bfile/cfile.c

   1 /*
   2  * Copyright (C) 1995-2000, Index Data ApS
   3  * All rights reserved.
   4  *
   5  * $Log: cfile.c,v $
   6  * Revision 1.26  2000-03-20 19:08:35  adam
   7  * Added remote record import using Z39.50 extended services and Segment
   8  * Requests.
   9  *
  10  * Revision 1.25  1999/05/26 07:49:12  adam
  11  * C++ compilation.
  12  *
  13  * Revision 1.24  1999/05/12 13:08:06  adam
  14  * First version of ISAMS.
  15  *
  16  * Revision 1.23  1998/10/15 13:09:29  adam
  17  * Minor changes.
  18  *
  19  * Revision 1.22  1998/10/13 20:07:22  adam
  20  * Changed some log messages.
  21  *
  22  * Revision 1.21  1998/08/24 17:29:52  adam
  23  * Minor changes.
  24  *
  25  * Revision 1.20  1998/08/07 15:07:13  adam
  26  * Fixed but in cf_commit_flat.
  27  *
  28  * Revision 1.19  1997/02/12 20:37:17  adam
  29  * Changed the messages logged. No real code changed.
  30  *
  31  * Revision 1.18  1996/10/29 13:56:15  adam
  32  * Include of zebrautl.h instead of alexutil.h.
  33  *
  34  * Revision 1.17  1996/04/19 16:49:00  adam
  35  * Minor changes.
  36  *
  37  * Revision 1.16  1996/04/19  16:23:47  adam
  38  * Serious bug fix in shadow implementation; function new_bucket might
  39  * set wrong bucket number on new bucket.
  40  *
  41  * Revision 1.15  1996/04/18  16:02:56  adam
  42  * Changed logging a bit.
  43  * Removed warning message when commiting flat shadow files.
  44  *
  45  * Revision 1.14  1996/04/12  07:01:55  adam
  46  * Yet another bug fix (next_block was initialized to 0; now set to 1).
  47  *
  48  * Revision 1.13  1996/04/09 14:48:49  adam
  49  * Bug fix: offset calculation when using flat files was completely broken.
  50  *
  51  * Revision 1.12  1996/04/09  06:47:28  adam
  52  * Function scan_areadef doesn't use sscanf (%n fails on this Linux).
  53  *
  54  * Revision 1.11  1996/03/26 15:59:05  adam
  55  * The directory of the shadow table file can be specified by the new
  56  * bf_lockDir call.
  57  *
  58  * Revision 1.10  1996/02/07  14:03:46  adam
  59  * Work on flat indexed shadow files.
  60  *
  61  * Revision 1.9  1996/02/07  10:08:43  adam
  62  * Work on flat shadow (not finished yet).
  63  *
  64  * Revision 1.8  1995/12/15  12:36:52  adam
  65  * Moved hash file information to union.
  66  * Renamed commit files.
  67  *
  68  * Revision 1.7  1995/12/15  10:35:07  adam
  69  * Changed names of commit files.
  70  *
  71  * Revision 1.6  1995/12/11  09:03:53  adam
  72  * New function: cf_unlink.
  73  * New member of commit file head: state (0) deleted, (1) hash file.
  74  *
  75  * Revision 1.5  1995/12/08  16:21:14  adam
  76  * Work on commit/update.
  77  *
  78  * Revision 1.4  1995/12/01  16:24:28  adam
  79  * Commit files use separate meta file area.
  80  *
  81  * Revision 1.3  1995/12/01  11:37:22  adam
  82  * Cached/commit files implemented as meta-files.
  83  *
  84  * Revision 1.2  1995/11/30  17:00:49  adam
  85  * Several bug fixes. Commit system runs now.
  86  *
  87  * Revision 1.1  1995/11/30  08:33:11  adam
  88  * Started work on commit facility.
  89  *
  90  */
  91
  92 #include <assert.h>
  93 #include <stdlib.h>
  94 #include <string.h>
  95
  96 #include <zebrautl.h>
  97 #include <mfile.h>
  98 #include "cfile.h"
  99
 100 static int write_head (CFile cf)
 101 {
 102     int left = cf->head.hash_size * sizeof(int);
 103     int bno = 1;
 104     const char *tab = (char*) cf->array;
 105
 106     if (!tab)
 107         return 0;
 108     while (left >= (int) HASH_BSIZE)
 109     {
 110         mf_write (cf->hash_mf, bno++, 0, 0, tab);
 111         tab += HASH_BSIZE;
 112         left -= HASH_BSIZE;
 113     }
 114     if (left > 0)
 115         mf_write (cf->hash_mf, bno, 0, left, tab);
 116     return 0;
 117 }
 118
 119 static int read_head (CFile cf)
 120 {
 121     int left = cf->head.hash_size * sizeof(int);
 122     int bno = 1;
 123     char *tab = (char*) cf->array;
 124
 125     if (!tab)
 126         return 0;
 127     while (left >= (int) HASH_BSIZE)
 128     {
 129         mf_read (cf->hash_mf, bno++, 0, 0, tab);
 130         tab += HASH_BSIZE;
 131         left -= HASH_BSIZE;
 132     }
 133     if (left > 0)
 134         mf_read (cf->hash_mf, bno, 0, left, tab);
 135     return 0;
 136 }
 137
 138
 139 CFile cf_open (MFile mf, MFile_area area, const char *fname,
 140                int block_size, int wflag, int *firstp)
 141 {
 142     char path[1024];
 143     int i;
 144     CFile cf = (CFile) xmalloc (sizeof(*cf));
 145     int hash_bytes;
 146
 147     cf->rmf = mf;
 148     logf (LOG_DEBUG, "cf: open %s %s", cf->rmf->name, wflag ? "rdwr" : "rd");
 149     sprintf (path, "%s-b", fname);
 150     if (!(cf->block_mf = mf_open (area, path, block_size, wflag)))
 151     {
 152         logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);
 153         exit (1);
 154     }
 155     sprintf (path, "%s-i", fname);
 156     if (!(cf->hash_mf = mf_open (area, path, HASH_BSIZE, wflag)))
 157     {
 158         logf (LOG_FATAL|LOG_ERRNO, "Failed to open %s", path);
 159         exit (1);
 160     }
 161     assert (firstp);
 162     if (!mf_read (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head) ||
 163         !cf->head.state)
 164     {
 165         *firstp = 1;
 166         cf->head.state = 1;
 167         cf->head.block_size = block_size;
 168         cf->head.hash_size = 199;
 169         hash_bytes = cf->head.hash_size * sizeof(int);
 170         cf->head.flat_bucket = cf->head.next_bucket = cf->head.first_bucket =
 171             (hash_bytes+sizeof(cf->head))/HASH_BSIZE + 2;
 172         cf->head.next_block = 1;
 173         if (wflag)
 174             mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);
 175         cf->array = (int *) xmalloc (hash_bytes);
 176         for (i = 0; i<cf->head.hash_size; i++)
 177             cf->array[i] = 0;
 178         if (wflag)
 179             write_head (cf);
 180     }
 181     else
 182     {
 183         *firstp = 0;
 184         assert (cf->head.block_size == block_size);
 185         assert (cf->head.hash_size > 2);
 186         hash_bytes = cf->head.hash_size * sizeof(int);
 187         assert (cf->head.next_bucket > 0);
 188         assert (cf->head.next_block > 0);
 189         if (cf->head.state == 1)
 190             cf->array = (int *) xmalloc (hash_bytes);
 191         else
 192             cf->array = NULL;
 193         read_head (cf);
 194     }
 195     if (cf->head.state == 1)
 196     {
 197         cf->parray = (struct CFile_hash_bucket **)
 198             xmalloc (cf->head.hash_size * sizeof(*cf->parray));
 199         for (i = 0; i<cf->head.hash_size; i++)
 200             cf->parray[i] = NULL;
 201     }
 202     else
 203         cf->parray = NULL;
 204     cf->bucket_lru_front = cf->bucket_lru_back = NULL;
 205     cf->bucket_in_memory = 0;
 206     cf->max_bucket_in_memory = 100;
 207     cf->dirty = 0;
 208     cf->iobuf = (char *) xmalloc (cf->head.block_size);
 209     memset (cf->iobuf, 0, cf->head.block_size);
 210     cf->no_hits = 0;
 211     cf->no_miss = 0;
 212     zebra_mutex_init (&cf->mutex);
 213     return cf;
 214 }
 215
 216 static int cf_hash (CFile cf, int no)
 217 {
 218     return (no>>3) % cf->head.hash_size;
 219 }
 220
 221 static void release_bucket (CFile cf, struct CFile_hash_bucket *p)
 222 {
 223     if (p->lru_prev)
 224         p->lru_prev->lru_next = p->lru_next;
 225     else
 226         cf->bucket_lru_back = p->lru_next;
 227     if (p->lru_next)
 228         p->lru_next->lru_prev = p->lru_prev;
 229     else
 230         cf->bucket_lru_front = p->lru_prev;
 231
 232     *p->h_prev = p->h_next;
 233     if (p->h_next)
 234         p->h_next->h_prev = p->h_prev;
 235
 236     --(cf->bucket_in_memory);
 237     xfree (p);
 238 }
 239
 240 static void flush_bucket (CFile cf, int no_to_flush)
 241 {
 242     int i;
 243     struct CFile_hash_bucket *p;
 244
 245     for (i = 0; i != no_to_flush; i++)
 246     {
 247         p = cf->bucket_lru_back;
 248         if (!p)
 249             break;
 250         if (p->dirty)
 251         {
 252             mf_write (cf->hash_mf, p->ph.this_bucket, 0, 0, &p->ph);
 253             cf->dirty = 1;
 254         }
 255         release_bucket (cf, p);
 256     }
 257 }
 258
 259 static struct CFile_hash_bucket *alloc_bucket (CFile cf, int block_no, int hno)
 260 {
 261     struct CFile_hash_bucket *p, **pp;
 262
 263     if (cf->bucket_in_memory == cf->max_bucket_in_memory)
 264         flush_bucket (cf, 1);
 265     assert (cf->bucket_in_memory < cf->max_bucket_in_memory);
 266     ++(cf->bucket_in_memory);
 267     p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));
 268
 269     p->lru_next = NULL;
 270     p->lru_prev = cf->bucket_lru_front;
 271     if (cf->bucket_lru_front)
 272         cf->bucket_lru_front->lru_next = p;
 273     else
 274         cf->bucket_lru_back = p;
 275     cf->bucket_lru_front = p;
 276
 277     pp = cf->parray + hno;
 278     p->h_next = *pp;
 279     p->h_prev = pp;
 280     if (*pp)
 281         (*pp)->h_prev = &p->h_next;
 282     *pp = p;
 283     return p;
 284 }
 285
 286 static struct CFile_hash_bucket *get_bucket (CFile cf, int block_no, int hno)
 287 {
 288     struct CFile_hash_bucket *p;
 289
 290     p = alloc_bucket (cf, block_no, hno);
 291     if (!mf_read (cf->hash_mf, block_no, 0, 0, &p->ph))
 292     {
 293         logf (LOG_FATAL|LOG_ERRNO, "read get_bucket");
 294         exit (1);
 295     }
 296     assert (p->ph.this_bucket == block_no);
 297     p->dirty = 0;
 298     return p;
 299 }
 300
 301 static struct CFile_hash_bucket *new_bucket (CFile cf, int *block_nop, int hno)
 302 {
 303     struct CFile_hash_bucket *p;
 304     int i, block_no;
 305
 306     block_no = *block_nop = cf->head.next_bucket++;
 307     p = alloc_bucket (cf, block_no, hno);
 308
 309     for (i = 0; i<HASH_BUCKET; i++)
 310     {
 311         p->ph.vno[i] = 0;
 312         p->ph.no[i] = 0;
 313     }
 314     p->ph.next_bucket = 0;
 315     p->ph.this_bucket = block_no;
 316     p->dirty = 1;
 317     return p;
 318 }
 319
 320 static int cf_lookup_flat (CFile cf, int no)
 321 {
 322     int hno = (no*sizeof(int))/HASH_BSIZE;
 323     int off = (no*sizeof(int)) - hno*HASH_BSIZE;
 324     int vno = 0;
 325
 326     mf_read (cf->hash_mf, hno+cf->head.next_bucket, off, sizeof(int), &vno);
 327     return vno;
 328 }
 329
 330 static int cf_lookup_hash (CFile cf, int no)
 331 {
 332     int hno = cf_hash (cf, no);
 333     struct CFile_hash_bucket *hb;
 334     int block_no, i;
 335
 336     for (hb = cf->parray[hno]; hb; hb = hb->h_next)
 337     {
 338         for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
 339             if (hb->ph.no[i] == no)
 340             {
 341                 (cf->no_hits)++;
 342                 return hb->ph.vno[i];
 343             }
 344     }
 345     for (block_no = cf->array[hno]; block_no; block_no = hb->ph.next_bucket)
 346     {
 347         for (hb = cf->parray[hno]; hb; hb = hb->h_next)
 348         {
 349             if (hb->ph.this_bucket == block_no)
 350                 break;
 351         }
 352         if (hb)
 353             continue;
 354 #if 0
 355         /* extra check ... */
 356         for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)
 357         {
 358             if (hb->ph.this_bucket == block_no)
 359             {
 360                 logf (LOG_FATAL, "Found hash bucket on other chain (1)");
 361                 abort ();
 362             }
 363             for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
 364                 if (hb->ph.no[i] == no)
 365                 {
 366                     logf (LOG_FATAL, "Found hash bucket on other chain (2)");
 367                     abort ();
 368                 }
 369         }
 370 #endif
 371         (cf->no_miss)++;
 372         hb = get_bucket (cf, block_no, hno);
 373         for (i = 0; i<HASH_BUCKET && hb->ph.vno[i]; i++)
 374             if (hb->ph.no[i] == no)
 375                 return hb->ph.vno[i];
 376     }
 377     return 0;
 378 }
 379
 380 static void cf_write_flat (CFile cf, int no, int vno)
 381 {
 382     int hno = (no*sizeof(int))/HASH_BSIZE;
 383     int off = (no*sizeof(int)) - hno*HASH_BSIZE;
 384
 385     hno += cf->head.next_bucket;
 386     if (hno >= cf->head.flat_bucket)
 387         cf->head.flat_bucket = hno+1;
 388     cf->dirty = 1;
 389     mf_write (cf->hash_mf, hno, off, sizeof(int), &vno);
 390 }
 391
 392 static void cf_moveto_flat (CFile cf)
 393 {
 394     struct CFile_hash_bucket *p;
 395     int i, j;
 396
 397     logf (LOG_DEBUG, "cf: Moving to flat shadow: %s", cf->rmf->name);
 398     logf (LOG_DEBUG, "cf: hits=%d miss=%d bucket_in_memory=%d total=%d",
 399         cf->no_hits, cf->no_miss, cf->bucket_in_memory,
 400         cf->head.next_bucket - cf->head.first_bucket);
 401     assert (cf->head.state == 1);
 402     flush_bucket (cf, -1);
 403     assert (cf->bucket_in_memory == 0);
 404     p = (struct CFile_hash_bucket *) xmalloc (sizeof(*p));
 405     for (i = cf->head.first_bucket; i < cf->head.next_bucket; i++)
 406     {
 407         if (!mf_read (cf->hash_mf, i, 0, 0, &p->ph))
 408         {
 409             logf (LOG_FATAL|LOG_ERRNO, "read bucket moveto flat");
 410             exit (1);
 411         }
 412         for (j = 0; j < HASH_BUCKET && p->ph.vno[j]; j++)
 413             cf_write_flat (cf, p->ph.no[j], p->ph.vno[j]);
 414     }
 415     xfree (p);
 416     xfree (cf->array);
 417     cf->array = NULL;
 418     xfree (cf->parray);
 419     cf->parray = NULL;
 420     cf->head.state = 2;
 421     cf->dirty = 1;
 422 }
 423
 424 static int cf_lookup (CFile cf, int no)
 425 {
 426     if (cf->head.state > 1)
 427         return cf_lookup_flat (cf, no);
 428     return cf_lookup_hash (cf, no);
 429 }
 430
 431 static int cf_new_flat (CFile cf, int no)
 432 {
 433     int vno = (cf->head.next_block)++;
 434
 435     cf_write_flat (cf, no, vno);
 436     return vno;
 437 }
 438
 439 static int cf_new_hash (CFile cf, int no)
 440 {
 441     int hno = cf_hash (cf, no);
 442     struct CFile_hash_bucket *hbprev = NULL, *hb = cf->parray[hno];
 443     int *bucketpp = &cf->array[hno];
 444     int i, vno = (cf->head.next_block)++;
 445
 446     for (hb = cf->parray[hno]; hb; hb = hb->h_next)
 447         if (!hb->ph.vno[HASH_BUCKET-1])
 448             for (i = 0; i<HASH_BUCKET; i++)
 449                 if (!hb->ph.vno[i])
 450                 {
 451                     (cf->no_hits)++;
 452                     hb->ph.no[i] = no;
 453                     hb->ph.vno[i] = vno;
 454                     hb->dirty = 1;
 455                     return vno;
 456                 }
 457
 458     while (*bucketpp)
 459     {
 460         for (hb = cf->parray[hno]; hb; hb = hb->h_next)
 461             if (hb->ph.this_bucket == *bucketpp)
 462             {
 463                 bucketpp = &hb->ph.next_bucket;
 464                 hbprev = hb;
 465                 break;
 466             }
 467         if (hb)
 468             continue;
 469
 470 #if 0
 471         /* extra check ... */
 472         for (hb = cf->bucket_lru_back; hb; hb = hb->lru_next)
 473         {
 474             if (hb->ph.this_bucket == *bucketpp)
 475             {
 476                 logf (LOG_FATAL, "Found hash bucket on other chain");
 477                 abort ();
 478             }
 479         }
 480 #endif
 481         (cf->no_miss)++;
 482         hb = get_bucket (cf, *bucketpp, hno);
 483         assert (hb);
 484         for (i = 0; i<HASH_BUCKET; i++)
 485             if (!hb->ph.vno[i])
 486             {
 487                 hb->ph.no[i] = no;
 488                 hb->ph.vno[i] = vno;
 489                 hb->dirty = 1;
 490                 return vno;
 491             }
 492         bucketpp = &hb->ph.next_bucket;
 493         hbprev = hb;
 494     }
 495     if (hbprev)
 496         hbprev->dirty = 1;
 497     hb = new_bucket (cf, bucketpp, hno);
 498     hb->ph.no[0] = no;
 499     hb->ph.vno[0] = vno;
 500     return vno;
 501 }
 502
 503 int cf_new (CFile cf, int no)
 504 {
 505     if (cf->head.state > 1)
 506         return cf_new_flat (cf, no);
 507     if (cf->no_miss*2 > cf->no_hits)
 508     {
 509         cf_moveto_flat (cf);
 510         assert (cf->head.state > 1);
 511         return cf_new_flat (cf, no);
 512     }
 513     return cf_new_hash (cf, no);
 514 }
 515
 516
 517 int cf_read (CFile cf, int no, int offset, int nbytes, void *buf)
 518 {
 519     int block;
 520
 521     assert (cf);
 522     zebra_mutex_lock (&cf->mutex);
 523     if (!(block = cf_lookup (cf, no)))
 524     {
 525         zebra_mutex_unlock (&cf->mutex);
 526         return -1;
 527     }
 528     zebra_mutex_unlock (&cf->mutex);
 529     if (!mf_read (cf->block_mf, block, offset, nbytes, buf))
 530     {
 531         logf (LOG_FATAL|LOG_ERRNO, "cf_read no=%d, block=%d", no, block);
 532         exit (1);
 533     }
 534     return 1;
 535 }
 536
 537 int cf_write (CFile cf, int no, int offset, int nbytes, const void *buf)
 538 {
 539     int block;
 540
 541     assert (cf);
 542     zebra_mutex_lock (&cf->mutex);
 543     if (!(block = cf_lookup (cf, no)))
 544     {
 545         block = cf_new (cf, no);
 546         if (offset || nbytes)
 547         {
 548             mf_read (cf->rmf, no, 0, 0, cf->iobuf);
 549             memcpy (cf->iobuf + offset, buf, nbytes);
 550             buf = cf->iobuf;
 551             offset = 0;
 552             nbytes = 0;
 553         }
 554     }
 555     zebra_mutex_unlock (&cf->mutex);
 556     if (mf_write (cf->block_mf, block, offset, nbytes, buf))
 557     {
 558         logf (LOG_FATAL|LOG_ERRNO, "cf_write no=%d, block=%d", no, block);
 559         exit (1);
 560     }
 561     return 0;
 562 }
 563
 564 int cf_close (CFile cf)
 565 {
 566     logf (LOG_DEBUG, "cf: close hits=%d miss=%d bucket_in_memory=%d total=%d",
 567           cf->no_hits, cf->no_miss, cf->bucket_in_memory,
 568           cf->head.next_bucket - cf->head.first_bucket);
 569     flush_bucket (cf, -1);
 570     if (cf->dirty)
 571     {
 572         mf_write (cf->hash_mf, 0, 0, sizeof(cf->head), &cf->head);
 573         write_head (cf);
 574     }
 575     mf_close (cf->hash_mf);
 576     mf_close (cf->block_mf);
 577     xfree (cf->array);
 578     xfree (cf->parray);
 579     xfree (cf->iobuf);
 580     zebra_mutex_destroy (&cf->mutex);
 581     xfree (cf);
 582     return 0;
 583 }
 584