X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=619fdfdf267a3243bc6c404837ee7cecb79eb65a;hp=73fe1a1b46065f9d615abe4233a72f0f57e82c38;hb=519fefb91135ad52134b9fc4e82b3874f5525a2b;hpb=ee4ada9656faa612678aee238151a8b6652ad549 diff --git a/index/extract.c b/index/extract.c index 73fe1a1..619fdfd 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.123 2002-08-29 10:00:15 adam Exp $ +/* $Id: extract.c,v 1.126 2002-10-22 12:51:08 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -1222,7 +1222,14 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, if (!zh->reg->key_buf) { - int mem = 8*1024*1024; + int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); + if (mem <= 0) + { + logf(LOG_WARN, "Invalid memory setting, using default 8 MB"); + mem= 1024*1024*8; + } + /* FIXME: That "8" should be in a default settings include */ + /* not hard-coded here! -H */ zh->reg->key_buf = (char**) xmalloc (mem); zh->reg->ptr_top = mem/sizeof(char*); zh->reg->ptr_i = 0; @@ -1331,6 +1338,7 @@ void extract_flushWriteKeys (ZebraHandle zh) cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; if (strcmp (cp, prevcp)) { + encode_key_flush ( &encode_info, outf); encode_key_init (&encode_info); encode_key_write (cp, &encode_info, outf); prevcp = cp; @@ -1338,6 +1346,7 @@ void extract_flushWriteKeys (ZebraHandle zh) else encode_key_write (cp + strlen(cp), &encode_info, outf); } + encode_key_flush ( &encode_info, outf); #else qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); extract_get_fname_tmp (out_fname, key_file_no); @@ -1369,6 +1378,7 @@ void extract_flushWriteKeys (ZebraHandle zh) cp = key_buf[ptr_top-ptr_i]; encode_key_write (cp+key_y_len, &encode_info, outf); } + encode_key_flush ( &encode_info, outf); if (!i) break; prevcp = key_buf[ptr_top-ptr_i]; @@ -1622,8 +1632,17 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, while (sk) { struct sortKey *sk_next = sk->next; - sortIdx_type (sortIdx, sk->attrUse); - sortIdx_add (sortIdx, sk->string, sk->length); + if (cmd == 1) + { + /* insert/update: set it */ + sortIdx_type (sortIdx, sk->attrUse); + sortIdx_add (sortIdx, sk->string, sk->length); + } + else if (cmd == 0) + { /* delete : zero it */ + sortIdx_type (sortIdx, sk->attrUse); + sortIdx_add (sortIdx, "", 1); + } xfree (sk->string); xfree (sk); sk = sk_next; @@ -1636,6 +1655,10 @@ void encode_key_init (struct encode_info *i) i->sysno = 0; i->seqno = 0; i->cmd = -1; + i->prevsys=0; + i->prevseq=0; + i->prevcmd=-1; + i->keylen=0; } char *encode_key_int (int d, char *bp) @@ -1663,6 +1686,11 @@ char *encode_key_int (int d, char *bp) return bp; } +#ifdef OLDENCODE +/* this is the old encode_key_write + * may be deleted once we are confident that the new works + * HL 15-oct-2002 + */ void encode_key_write (char *k, struct encode_info *i, FILE *outf) { struct it_key key; @@ -1689,3 +1717,126 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) } } +void encode_key_flush (struct encode_info *i, FILE *outf) +{ /* dummy routine */ +} + +#else + +/* new encode_key_write + * The idea is to buffer one more key, and compare them + * If we are going to delete and insert the same key, + * we may as well not bother. Should make a difference in + * updates with small modifications (appending to a mbox) + */ +void encode_key_write (char *k, struct encode_info *i, FILE *outf) +{ + struct it_key key; + char *bp; + + if (*k) /* first time for new key */ + { + bp = i->buf; + while ((*bp++ = *k++)) + ; + i->keylen= bp - i->buf -1; + } + else + { + bp=i->buf + i->keylen; + *bp++=0; + k++; + } + + memcpy (&key, k+1, sizeof(struct it_key)); + if (0==i->prevsys) /* no previous filter, fill up */ + { + i->prevsys=key.sysno; + i->prevseq=key.seqno; + i->prevcmd=*k; + } + else if ( (i->prevsys==key.sysno) && + (i->prevseq==key.seqno) && + (i->prevcmd!=*k) ) + { /* same numbers, diff cmd, they cancel out */ + i->prevsys=0; + } + else + { /* different stuff, write previous, move buf */ + bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); + if (i->sysno != i->prevsys) + { + i->sysno = i->prevsys; + i->seqno = 0; + } + else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) + { + return; /* ??? Filters some sort of duplicates away */ + /* ??? Can this ever happen -H 15oct02 */ + } + bp = encode_key_int (i->prevseq - i->seqno, bp); + i->seqno = i->prevseq; + i->cmd = i->prevcmd; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ + i->prevsys=key.sysno; + i->prevseq=key.seqno; + i->prevcmd=*k; + } +#ifdef SKIPTHIS_OLDCODE + bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp); + if (i->sysno != key.sysno) + { + i->sysno = key.sysno; + i->seqno = 0; + } + else if (!i->seqno && !key.seqno && i->cmd == *k) + return; + bp = encode_key_int (key.seqno - i->seqno, bp); + i->seqno = key.seqno; + i->cmd = *k; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ +#endif +} + +void encode_key_flush (struct encode_info *i, FILE *outf) +{ /* flush the last key from i */ + char *bp =i->buf + i->keylen; + if (0==i->prevsys) + { + return; /* nothing to flush */ + } + *bp++=0; + bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); + if (i->sysno != i->prevsys) + { + i->sysno = i->prevsys; + i->seqno = 0; + } + else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) + { + return; /* ??? Filters some sort of duplicates away */ + /* ??? Can this ever happen -H 15oct02 */ + } + bp = encode_key_int (i->prevseq - i->seqno, bp); + i->seqno = i->prevseq; + i->cmd = i->prevcmd; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ + i->prevsys=0; /* forget the values too */ + i->prevseq=0; +} +#endif