-/* $Id: kinput.c,v 1.65 2005-04-13 13:03:47 adam Exp $
+/* $Id: kinput.c,v 1.75 2006-05-17 17:46:45 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <fcntl.h>
#ifdef WIN32
#include <io.h>
-#else
+#endif
+#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#define INP_BUF_START 60000
#define INP_BUF_ADD 400000
-
struct key_file {
int no; /* file no */
off_t offset; /* file offset */
Res res;
};
+#if 0
+static void pkey(const char *b, int mode)
+{
+ key_logdump_txt(YLOG_LOG, b, mode ? "i" : "d");
+}
+#endif
+
+
void getFnameTmp (Res res, char *fname, int no)
{
const char *pre;
srcbuf[j] = key_file_getc(f);
dst = key + i;
iscz1_decode(f->decode_handle, &dst, &src);
+
+#if 0
+ /* debugging */
+ if (1)
+ {
+ struct it_key k;
+ memcpy(&k, key+i, sizeof(k));
+ if (!k.mem[1])
+ yaz_log(YLOG_LOG, "00 KEY");
+ }
+#endif
return i + sizeof(struct it_key);
}
int *ptr;
int (*cmp)(const void *p1, const void *p2);
struct zebra_register *reg;
- ZebraHandle zh; /* only used for raw reading that bypasses the heaps */
+ ZebraHandle zh;
+ int raw_reading; /* 1=raw /mem read. 0=file reading */
int no_diffs;
int no_updates;
int no_deletions;
hi->info.buf = 0;
hi->heapnum = 0;
hi->ptr = 0;
- hi->zh=0;
+ hi->raw_reading = 0;
hi->no_diffs = 0;
hi->no_diffs = 0;
hi->no_updates = 0;
return hi;
}
-struct heap_info *key_heap_init (int nkeys,
- int (*cmp)(const void *p1, const void *p2))
+struct heap_info *key_heap_init_file(ZebraHandle zh,
+ int nkeys,
+ int (*cmp)(const void *p1, const void *p2))
{
struct heap_info *hi;
int i;
hi = key_heap_malloc();
+ hi->zh = zh;
hi->info.file = (struct key_file **)
xmalloc (sizeof(*hi->info.file) * (1+nkeys));
hi->info.buf = (char **) xmalloc (sizeof(*hi->info.buf) * (1+nkeys));
return hi;
}
-struct heap_info *key_heap_init_buff ( ZebraHandle zh,
- int (*cmp)(const void *p1, const void *p2))
+struct heap_info *key_heap_init_raw(ZebraHandle zh,
+ int (*cmp)(const void *p1, const void *p2))
{
struct heap_info *hi=key_heap_malloc();
- hi->cmp=cmp;
- hi->zh=zh;
+ hi->cmp = cmp;
+ hi->zh = zh;
+ hi->raw_reading = 1;
return hi;
}
void key_heap_destroy (struct heap_info *hi, int nkeys)
{
int i;
- yaz_log (YLOG_DEBUG, "key_heap_destroy");
- yaz_log (YLOG_DEBUG, "key_heap_destroy nk=%d",nkeys);
- if (!hi->zh)
+ if (!hi->raw_reading)
for (i = 0; i<=nkeys; i++)
xfree (hi->info.buf[i]);
}
}
-static int heap_read_one_raw (struct heap_info *hi, char *name, char *key)
+static int heap_read_one_raw(struct heap_info *hi, char *name, char *key)
{
- ZebraHandle zh=hi->zh;
+ ZebraHandle zh = hi->zh;
size_t ptr_i = zh->reg->ptr_i;
char *cp;
if (!ptr_i)
return 0;
--(zh->reg->ptr_i);
cp=(zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
- yaz_log (YLOG_DEBUG, " raw: i=%ld top=%ld cp=%p", (long) ptr_i,
- (long) zh->reg->ptr_top, cp);
strcpy(name, cp);
memcpy(key, cp+strlen(name)+1, KEY_SIZE);
hi->no_iterations++;
char rbuf[INP_NAME_MAX];
struct key_file *kf;
- if (hi->zh) /* bypass the heap stuff, we have a readymade buffer */
+ if (hi->raw_reading)
return heap_read_one_raw(hi, name, key);
if (!hi->heapnum)
return 1;
}
-#define PR_KEY 0
+#define PR_KEY_LOW 0
+#define PR_KEY_TOP 0
-#if PR_KEY
-static void pkey(const char *b, int mode)
+#if 0
+/* for debugging only */
+static void print_dict_item(ZebraHandle zh, const char *s)
{
- struct it_key *key = (struct it_key *) b;
- printf ("%c %d:%d\n", mode + 48, key->sysno, key->seqno);
+ char dst[IT_MAX_WORD+1];
+ int ord;
+ int len = key_SU_decode(&ord, (const unsigned char *) s);
+ int index_type;
+ const char *db = 0;
+
+ if (!zh)
+ yaz_log(YLOG_LOG, "ord=%d", ord);
+ else
+ {
+ zebraExplain_lookup_ord (zh->reg->zei,
+ ord, &index_type, &db, 0, 0, 0);
+
+ zebra_term_untrans(zh, index_type, dst, s + len);
+
+ yaz_log(YLOG_LOG, "ord=%d term=%s", ord, dst);
+ }
}
#endif
int first_in_list;
int more;
int ret;
+ int look_level;
};
static int heap_cread_item (void *vp, char **dst, int *insertMode);
-int heap_cread_item2 (void *vp, char **dst, int *insertMode)
+int heap_cread_item2(void *vp, char **dst, int *insertMode)
{
struct heap_cread_info *p = (struct heap_cread_info *) vp;
int level = 0;
+ if (p->look_level)
+ {
+ if (p->look_level > 0)
+ {
+ *insertMode = 1;
+ p->look_level--;
+ }
+ else
+ {
+ *insertMode = 0;
+ p->look_level++;
+ }
+ memcpy (*dst, p->key_1, p->sz_1);
+#if 0
+ yaz_log(YLOG_LOG, "DUP level=%d", p->look_level);
+ pkey(*dst, *insertMode);
+#endif
+ (*dst) += p->sz_1;
+ return 1;
+ }
if (p->ret == 0) /* lookahead was 0?. Return that in read next round */
{
p->ret = -1;
return 0;
}
p->sz_2 = dst_2 - p->key_2;
- if (p->sz_1 == p->sz_2 && memcmp(p->key_1, p->key_2, p->sz_1) == 0)
+
+ if (key_compare(p->key_1, p->key_2) == 0)
{
if (p->mode_2) /* adjust level according to deletes/inserts */
level++;
}
/* outcome is insert (1) or delete (0) depending on final level */
if (level > 0)
+ {
*insertMode = 1;
+ level--;
+ }
else
+ {
*insertMode = 0;
+ level++;
+ }
+ p->look_level = level;
memcpy (*dst, p->key_1, p->sz_1);
-#if PR_KEY
- printf ("top: ");
- pkey(*dst, *insertMode); fflush(stdout);
+#if 0
+ pkey(*dst, *insertMode);
#endif
(*dst) += p->sz_1;
return 1;
{
*insertMode = p->key[0];
memcpy (*dst, p->key+1, sizeof(struct it_key));
-#if PR_KEY
- printf ("sub1: ");
+#if PR_KEY_LOW
pkey(*dst, *insertMode);
#endif
(*dst) += sizeof(struct it_key);
}
*insertMode = p->key[0];
memcpy (*dst, p->key+1, sizeof(struct it_key));
-#if PR_KEY
- printf ("sub2: ");
+#if PR_KEY_LOW
pkey(*dst, *insertMode);
#endif
(*dst) += sizeof(struct it_key);
return 1;
}
-int heap_inpc (struct heap_info *hi)
+int heap_inpc (struct heap_cread_info *hci, struct heap_info *hi)
{
- struct heap_cread_info hci;
ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i));
- hci.key = (char *) xmalloc (KEY_SIZE);
- hci.key_1 = (char *) xmalloc (KEY_SIZE);
- hci.key_2 = (char *) xmalloc (KEY_SIZE);
- hci.ret = -1;
- hci.first_in_list = 1;
- hci.hi = hi;
- hci.more = heap_read_one (hi, hci.cur_name, hci.key);
-
- isamc_i->clientData = &hci;
+ isamc_i->clientData = hci;
isamc_i->read_item = heap_cread_item2;
- while (hci.more)
+ while (hci->more)
{
char this_name[INP_NAME_MAX];
ISAM_P isamc_p, isamc_p2;
char *dict_info;
- strcpy (this_name, hci.cur_name);
- assert (hci.cur_name[1]);
+ strcpy (this_name, hci->cur_name);
+ assert (hci->cur_name[0]);
hi->no_diffs++;
- if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
+ if ((dict_info = dict_lookup (hi->reg->dict, hci->cur_name)))
{
memcpy (&isamc_p, dict_info+1, sizeof(ISAM_P));
isamc_p2 = isamc_p;
isamc_p = 0;
isamc_merge (hi->reg->isamc, &isamc_p, isamc_i);
hi->no_insertions++;
- dict_insert (hi->reg->dict, this_name, sizeof(ISAM_P), &isamc_p);
+ if (isamc_p)
+ dict_insert (hi->reg->dict, this_name,
+ sizeof(ISAM_P), &isamc_p);
}
}
xfree (isamc_i);
- xfree (hci.key);
- xfree (hci.key_1);
- xfree (hci.key_2);
return 0;
}
-#if 0
-/* for debugging only */
-static void print_dict_item (ZebraMaps zm, const char *s)
+int heap_inp0(struct heap_cread_info *hci, struct heap_info *hi)
{
- int reg_type = s[1];
- char keybuf[IT_MAX_WORD+1];
- char *to = keybuf;
- const char *from = s + 2;
-
- while (*from)
+ while (hci->more)
{
- const char *res = zebra_maps_output (zm, reg_type, &from);
- if (!res)
- *to++ = *from++;
- else
- while (*res)
- *to++ = *res++;
+ char this_name[INP_NAME_MAX];
+ char mybuf[1024];
+ char *dst = mybuf;
+ int mode;
+
+ strcpy (this_name, hci->cur_name);
+ assert (hci->cur_name[0]);
+ hi->no_diffs++;
+
+ while (heap_cread_item2(hci, &dst, &mode))
+ ;
}
- *to = '\0';
- yaz_log (YLOG_LOG, "%s", keybuf);
-}
-#endif
+ return 0;
+}
-int heap_inpb (struct heap_info *hi)
+
+int heap_inpb(struct heap_cread_info *hci, struct heap_info *hi)
{
- struct heap_cread_info hci;
ISAMC_I *isamc_i = (ISAMC_I *) xmalloc (sizeof(*isamc_i));
- hci.key = (char *) xmalloc (KEY_SIZE);
- hci.key_1 = (char *) xmalloc (KEY_SIZE);
- hci.key_2 = (char *) xmalloc (KEY_SIZE);
- hci.ret = -1;
- hci.first_in_list = 1;
- hci.hi = hi;
- hci.more = heap_read_one (hi, hci.cur_name, hci.key);
-
- isamc_i->clientData = &hci;
+ isamc_i->clientData = hci;
isamc_i->read_item = heap_cread_item2;
- while (hci.more)
+ while (hci->more)
{
char this_name[INP_NAME_MAX];
ISAM_P isamc_p, isamc_p2;
char *dict_info;
- strcpy (this_name, hci.cur_name);
- assert (hci.cur_name[1]);
+ strcpy (this_name, hci->cur_name);
+ assert (hci->cur_name[0]);
hi->no_diffs++;
#if 0
- print_dict_item (hi->reg->zebra_maps, hci.cur_name);
+ assert(hi->zh);
+ print_dict_item(hi->zh, hci->cur_name);
#endif
- if ((dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
+ if ((dict_info = dict_lookup (hi->reg->dict, hci->cur_name)))
{
memcpy (&isamc_p, dict_info+1, sizeof(ISAM_P));
isamc_p2 = isamc_p;
isamc_p = 0;
isamb_merge (hi->reg->isamb, &isamc_p, isamc_i);
hi->no_insertions++;
- dict_insert (hi->reg->dict, this_name, sizeof(ISAM_P), &isamc_p);
+ if (isamc_p)
+ dict_insert (hi->reg->dict, this_name,
+ sizeof(ISAM_P), &isamc_p);
}
}
- xfree (isamc_i);
- xfree (hci.key);
- xfree (hci.key_1);
- xfree (hci.key_2);
+ xfree(isamc_i);
return 0;
}
-int heap_inps (struct heap_info *hi)
+int heap_inps (struct heap_cread_info *hci, struct heap_info *hi)
{
- struct heap_cread_info hci;
ISAMS_I isams_i = (ISAMS_I) xmalloc (sizeof(*isams_i));
- hci.key = (char *) xmalloc (KEY_SIZE);
- hci.key_1 = (char *) xmalloc (KEY_SIZE);
- hci.key_2 = (char *) xmalloc (KEY_SIZE);
- hci.first_in_list = 1;
- hci.ret = -1;
- hci.hi = hi;
- hci.more = heap_read_one (hi, hci.cur_name, hci.key);
-
- isams_i->clientData = &hci;
+ isams_i->clientData = hci;
isams_i->read_item = heap_cread_item;
- while (hci.more)
+ while (hci->more)
{
char this_name[INP_NAME_MAX];
ISAM_P isams_p;
char *dict_info;
- strcpy (this_name, hci.cur_name);
- assert (hci.cur_name[1]);
+ strcpy (this_name, hci->cur_name);
+ assert (hci->cur_name[0]);
hi->no_diffs++;
- if (!(dict_info = dict_lookup (hi->reg->dict, hci.cur_name)))
+ if (!(dict_info = dict_lookup (hi->reg->dict, hci->cur_name)))
{
isams_p = isams_merge (hi->reg->isams, isams_i);
hi->no_insertions++;
struct progressInfo progressInfo;
int nkeys = zh->reg->key_file_no;
int usefile;
-
yaz_log (YLOG_DEBUG, " index_merge called with nk=%d b=%p",
nkeys, zh->reg->key_buf);
if ( (nkeys==0) && (zh->reg->key_buf==0) )
progressInfo.totalBytes += kf[i]->length;
progressInfo.totalOffset += kf[i]->buf_size;
}
- hi = key_heap_init (nkeys, key_qsort_compare);
+ hi = key_heap_init_file(zh, nkeys, key_qsort_compare);
hi->reg = zh->reg;
for (i = 1; i<=nkeys; i++)
} /* use file */
else
{ /* do not use file, read straight from buffer */
- hi = key_heap_init_buff (zh,key_qsort_compare);
+ hi = key_heap_init_raw(zh, key_qsort_compare);
hi->reg = zh->reg;
}
- if (zh->reg->isams)
- heap_inps (hi);
- if (zh->reg->isamc)
- heap_inpc (hi);
- if (zh->reg->isamb)
- heap_inpb (hi);
+
+ if (1)
+ {
+ struct heap_cread_info hci;
+
+ hci.key = (char *) xmalloc (KEY_SIZE);
+ hci.key_1 = (char *) xmalloc (KEY_SIZE);
+ hci.key_2 = (char *) xmalloc (KEY_SIZE);
+ hci.ret = -1;
+ hci.first_in_list = 1;
+ hci.hi = hi;
+ hci.look_level = 0;
+ hci.more = heap_read_one (hi, hci.cur_name, hci.key);
+
+ if (zh->reg->isams)
+ heap_inps(&hci, hi);
+ if (zh->reg->isamc)
+ heap_inpc(&hci, hi);
+ if (zh->reg->isamb)
+ heap_inpb(&hci, hi);
+
+ xfree (hci.key);
+ xfree (hci.key_1);
+ xfree (hci.key_2);
+ }
if (usefile)
{
key_heap_destroy (hi, nkeys);
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+