-/* $Id: extract.c,v 1.158 2004-08-04 08:35:23 adam Exp $
+/* $Id: extract.c,v 1.157.2.1 2004-09-16 14:07:50 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
02111-1307, USA.
*/
+
#include <stdio.h>
#include <assert.h>
#include <ctype.h>
struct recKeys *reckeys,
int attrSetS, int attrUseS)
{
-#if IT_KEY_NEW
-/* #error searchRecordKey does not work yet in this mode.. */
- static const char *ws[32];
- void *decode_handle = iscz1_start();
- int off = 0;
- int startSeq = -1;
- int seqno = 0;
- int i;
-
- for (i = 0; i<32; i++)
- ws[i] = NULL;
-
- while (off < reckeys->buf_used)
- {
- const char *src = reckeys->buf + off;
- struct it_key key;
- char *dst = (char*) &key;
- int attrSet, attrUse;
-
- iscz1_decode(decode_handle, &dst, &src);
- assert(key.len < 4 && key.len > 2);
-
- attrSet = key.mem[0];
- attrUse = key.mem[1];
- seqno = key.mem[2];
-
- if (attrUseS == attrUse && attrSetS == attrSet)
- {
- int woff;
-
- if (startSeq == -1)
- startSeq = seqno;
- woff = seqno - startSeq;
- if (woff >= 0 && woff < 31)
- ws[woff] = src;
- }
-
- while (*src++)
- ;
- off = src - reckeys->buf;
- }
- iscz1_stop(decode_handle);
- assert (off == reckeys->buf_used);
- return ws;
-#else
static const char *ws[32];
int off = 0;
int startSeq = -1;
}
assert (off == reckeys->buf_used);
return ws;
-#endif
}
struct file_read_info {
int recordOffset;
struct recordGroup *rGroup;
};
-
-void create_rec_keys_codec(struct recKeys *keys)
-{
- keys->buf_used = 0;
-#if IT_KEY_NEW
- iscz1_reset(keys->codec_handle);
-#else
- keys->prevAttrUse = -1;
- keys->prevAttrSet = -1;
- keys->prevSeqNo = 0;
-#endif
-}
static int file_extract_record(ZebraHandle zh,
SYSNO *sysno, const char *fname,
/* we are going to read from a file, so prepare the extraction */
int i;
- create_rec_keys_codec(&zh->reg->keys);
-
+ zh->reg->keys.buf_used = 0;
+ zh->reg->keys.prevAttrUse = -1;
+ zh->reg->keys.prevAttrSet = -1;
+ zh->reg->keys.prevSeqNo = 0;
zh->reg->sortKeys.buf_used = 0;
recordOffset = fi->file_moffset;
{
rinfo = dict_lookup (zh->reg->matchDict, matchStr);
if (rinfo)
- {
- assert(*rinfo == sizeof(*sysno));
memcpy (sysno, rinfo+1, sizeof(*sysno));
- }
}
else
{
int delete_flag,
int test_mode,
const char *recordType,
- SYSNO *sysno,
+ int *sysno,
const char *match_criteria,
const char *fname,
int force_update,
extractCtrl.endf = zebra_record_int_end;
extractCtrl.fh = &fc;
- create_rec_keys_codec(&zh->reg->keys);
-
+ zh->reg->keys.buf_used = 0;
+ zh->reg->keys.prevAttrUse = -1;
+ zh->reg->keys.prevAttrSet = -1;
+ zh->reg->keys.prevSeqNo = 0;
zh->reg->sortKeys.buf_used = 0;
if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
if (matchStr) {
rinfo = dict_lookup (zh->reg->matchDict, matchStr);
if (rinfo)
- {
- assert(*rinfo == sizeof(*sysno));
memcpy (sysno, rinfo+1, sizeof(*sysno));
- }
}
}
abort ();
}
- create_rec_keys_codec(&zh->reg->keys);
-
+ zh->reg->keys.buf_used = 0;
+ zh->reg->keys.prevAttrUse = -1;
+ zh->reg->keys.prevAttrSet = -1;
+ zh->reg->keys.prevSeqNo = 0;
zh->reg->sortKeys.buf_used = 0;
extractCtrl.init = extract_init;
void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
int cmd, struct recKeys *reckeys)
{
-#if IT_KEY_NEW
- void *decode_handle = iscz1_start();
-#else
- int seqno = 0;
#if SU_SCHEME
#else
unsigned char attrSet = (unsigned char) -1;
unsigned short attrUse = (unsigned short) -1;
#endif
-#endif
+ int seqno = 0;
int off = 0;
int ch = 0;
ZebraExplainInfo zei = zh->reg->zei;
zh->reg->key_file_no = 0;
}
zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
-#if IT_KEY_NEW
- while (off < reckeys->buf_used)
- {
- const char *src = reckeys->buf + off;
- struct it_key key;
- char *dst = (char*) &key;
- int attrSet, attrUse;
-
- iscz1_decode(decode_handle, &dst, &src);
- assert(key.len < 4 && key.len > 2);
-
- attrSet = key.mem[0];
- attrUse = key.mem[1]; /* sequence in mem[2] */
-
- if (zh->reg->key_buf_used + 1024 >
- (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
- extract_flushWriteKeys (zh,0);
- assert(zh->reg->ptr_i >= 0);
- ++(zh->reg->ptr_i);
- assert(zh->reg->ptr_i > 0);
- (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
- (char*)zh->reg->key_buf + zh->reg->key_buf_used;
-
- ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
- if (ch < 0)
- ch = zebraExplain_addSU (zei, attrSet, attrUse);
-
- assert (ch > 0);
- zh->reg->key_buf_used +=
- key_SU_encode (ch,((char*)zh->reg->key_buf) +
- zh->reg->key_buf_used);
- while (*src)
- ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++;
- src++;
- ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0';
- ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd;
-
- key.len = 2;
- key.mem[0] = sysno;
- key.mem[1] = key.mem[2]; /* sequence .. */
-
- memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used,
- &key, sizeof(key));
- (zh->reg->key_buf_used) += sizeof(key);
- off = src - reckeys->buf;
- }
-#else
while (off < reckeys->buf_used)
{
const char *src = reckeys->buf + off;
(zh->reg->key_buf_used) += sizeof(key);
off = src - reckeys->buf;
}
-#endif
assert (off == reckeys->buf_used);
-#if IT_KEY_NEW
- iscz1_stop(decode_handle);
-#endif
}
void extract_flushWriteKeys (ZebraHandle zh, int final)
zh->reg->key_buf_used = 0;
}
-void extract_add_index_string (RecWord *p, const char *str, int length)
+void extract_add_index_string (RecWord *p, const char *string,
+ int length)
{
char *dst;
- ZebraHandle zh = p->extractCtrl->handle;
- struct recKeys *keys = &zh->reg->keys;
-#if IT_KEY_NEW
- struct it_key key;
- const char *src = (char*) &key;
-#else
unsigned char attrSet;
unsigned short attrUse;
int lead = 0;
int diff = 0;
int *pseqno = &p->seqno;
+ ZebraHandle zh = p->extractCtrl->handle;
ZebraExplainInfo zei = zh->reg->zei;
-#endif
+ struct recKeys *keys = &zh->reg->keys;
if (keys->buf_used+1024 > keys->buf_max)
{
}
dst = keys->buf + keys->buf_used;
-#if IT_KEY_NEW
- key.len = 3;
- key.mem[0] = p->attrSet;
- key.mem[1] = p->attrUse;
- key.mem[2] = p->seqno;
-
-#if 0
- /* just for debugging .. */
- yaz_log(LOG_LOG, "set=%d use=%d seqno=%d", p->attrSet, p->attrUse,
- p->seqno);
-#endif
-
- iscz1_encode(keys->codec_handle, &dst, &src);
-
- *dst++ = p->reg_type;
- memcpy (dst, str, length);
- dst += length;
- *dst++ = '\0';
-#else
/* leader byte is encoded as follows:
bit 0 : 1 if attrset is unchanged; 0 if attrset is changed
bit 1 : 1 if attruse is unchanged; 0 if attruse is changed
lead |= 2;
else
keys->prevAttrUse = attrUse;
-
+#if 1
diff = 1 + *pseqno - keys->prevSeqNo;
if (diff >= 1 && diff <= 15)
lead |= (diff << 2);
else
diff = 0;
-
+#endif
keys->prevSeqNo = *pseqno;
*dst++ = lead;
}
#endif
*dst++ = p->reg_type;
- memcpy (dst, str, length);
+ memcpy (dst, string, length);
dst += length;
*dst++ = '\0';
memcpy (dst, pseqno, sizeof(*pseqno));
dst += sizeof(*pseqno);
}
-#endif
keys->buf_used = dst - keys->buf;
}
-static void extract_add_sort_string (RecWord *p, const char *str,
+static void extract_add_sort_string (RecWord *p, const char *string,
int length)
{
ZebraHandle zh = p->extractCtrl->handle;
off += key_SU_encode(p->attrSet, sk->buf + off);
off += key_SU_encode(p->attrUse, sk->buf + off);
off += key_SU_encode(length, sk->buf + off);
- memcpy (sk->buf + off, str, length);
+ memcpy (sk->buf + off, string, length);
sk->buf_used = off + length;
}
const char **map = 0;
if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
while (map)
{
{
remain = p->length - (b - p->string);
if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
else
map = 0;
}
buf[i++] = *(cp++);
remain = p->length - (b - p->string);
if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, 0);
else
map = 0;
}
char buf[IT_MAX_WORD+1];
const char **map = 0;
int i = 0, remain = p->length;
+ int first; /* first position */
+
+yaz_log(LOG_DEBUG, "Complete field, w='%s'", p->string);
if (remain > 0)
- map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain);
+ map = zebra_maps_input (p->zebra_maps, p->reg_type, &b, remain, 1);
while (remain > 0 && i < IT_MAX_WORD)
{
remain = p->length - (b - p->string);
if (remain > 0)
- map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain);
+ {
+ first = i ? 0 : 1;
+ map = zebra_maps_input(p->zebra_maps, p->reg_type, &b, remain, first);
+ }
else
map = 0;
}
{
if (i >= IT_MAX_WORD)
break;
+yaz_log(LOG_DEBUG, "Adding string to index '%d'", *map);
while (i < IT_MAX_WORD && *cp)
buf[i++] = *(cp++);
}
remain = p->length - (b - p->string);
if (remain > 0)
+ {
map = zebra_maps_input (p->zebra_maps, p->reg_type, &b,
- remain);
+ remain, 0);
+ }
else
map = 0;
}
i->prevseq=0;
i->prevcmd=-1;
i->keylen=0;
-#if IT_KEY_NEW
- i->encode_handle = iscz1_start();
-#endif
}
-#if IT_KEY_NEW
-#else
char *encode_key_int (int d, char *bp)
{
if (d <= 63)
}
return bp;
}
-#endif
-
#define OLDENCODE 1
#ifdef OLDENCODE
void encode_key_write (char *k, struct encode_info *i, FILE *outf)
{
struct it_key key;
- char *bp = i->buf, *bp0;
- const char *src = (char *) &key;
+ char *bp = i->buf;
- /* copy term to output buf */
while ((*bp++ = *k++))
;
- /* and copy & align key so we can mangle */
- memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */
-#if IT_KEY_NEW
- bp0 = bp++;
- iscz1_encode(i->encode_handle, &bp, &src);
- *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
-#else
+ memcpy (&key, k+1, sizeof(struct it_key));
bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp);
if (i->sysno != key.sysno)
{
bp = encode_key_int (key.seqno - i->seqno, bp);
i->seqno = key.seqno;
i->cmd = *k;
-#endif
if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
{
logf (LOG_FATAL|LOG_ERRNO, "fwrite");
void encode_key_flush (struct encode_info *i, FILE *outf)
{ /* dummy routine */
-#if IT_KEY_NEW
- iscz1_stop(i->encode_handle);
-#endif
}
#else