-/* $Id: extract.c,v 1.238 2006-11-20 13:59:13 adam Exp $
- Copyright (C) 1995-2006
+/* $Id: extract.c,v 1.251 2007-03-13 13:46:11 adam Exp $
+ Copyright (C) 1995-2007
Index Data ApS
This file is part of the Zebra server.
#include <direntz.h>
#include <charmap.h>
-#define ENCODE_BUFLEN 768
-struct encode_info {
- void *encode_handle;
- void *decode_handle;
- char buf[ENCODE_BUFLEN];
-};
-
static int log_level_extract = 0;
static int log_level_details = 0;
static int log_level_initialized = 0;
}
}
-static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t reckeys,
- zint staticrank);
-static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t skp);
+static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t reckeys,
+ zint staticrank);
+static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t skp);
static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add (RecWord *p);
-static void encode_key_init (struct encode_info *i);
-static void encode_key_write (char *k, struct encode_info *i, FILE *outf);
-static void encode_key_flush (struct encode_info *i, FILE *outf);
-
-#define USE_SHELLSORT 0
-
-#if USE_SHELLSORT
-static void shellsort(void *ar, int r, size_t s,
- int (*cmp)(const void *a, const void *b))
+static void check_log_limit(ZebraHandle zh)
{
- char *a = ar;
- char v[100];
- int h, i, j, k;
- static const int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
- 13776, 4592, 1968, 861, 336,
- 112, 48, 21, 7, 3, 1 };
- for ( k = 0; k < 16; k++)
- for (h = incs[k], i = h; i < r; i++)
- {
- memcpy (v, a+s*i, s);
- j = i;
- while (j > h && (*cmp)(a + s*(j-h), v) > 0)
- {
- memcpy (a + s*j, a + s*(j-h), s);
- j -= h;
- }
- memcpy (a+s*j, v, s);
- }
+ if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit)
+ {
+ yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
+ zh->m_file_verbose_limit);
+ }
}
-#endif
static void logRecord (ZebraHandle zh)
{
+ check_log_limit(zh);
++zh->records_processed;
if (!(zh->records_processed % 1000))
{
#define FILE_MATCH_BLANK "\t "
-static char *fileMatchStr (ZebraHandle zh,
- zebra_rec_keys_t reckeys,
- const char *fname, const char *spec)
+static char *get_match_from_spec(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ const char *fname, const char *spec)
{
static char dstBuf[2048]; /* static here ??? */
char *dst = dstBuf;
"", 0);
}
-ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname,
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData);
+
+
+ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
int deleteFlag)
{
ZEBRA_RES r = ZEBRA_OK;
}
if (!zh->m_record_type)
{
- if (zh->records_processed < zh->m_file_verbose_limit)
+ check_log_limit(zh);
+ if (zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit)
yaz_log (YLOG_LOG, "? %s", fname);
+ zh->records_skipped++;
return 0;
}
/* determine match criteria */
streamp = &stream;
zebra_create_stream_fd(streamp, fd, 0);
}
- while(1)
- {
- r = zebra_extract_record_stream(zh, streamp,
- deleteFlag,
- 0, /* tst_mode */
- zh->m_record_type,
- sysno,
- 0, /*match_criteria */
- fname,
- 1, /* force_update */
- 1, /* allow_update */
- recType, recTypeClientData);
- if (r != ZEBRA_OK)
- {
- break;
- }
- if (sysno)
- {
- break;
- }
- }
+ r = zebra_extract_records_stream(zh, streamp,
+ deleteFlag,
+ 0, /* tst_mode */
+ zh->m_record_type,
+ sysno,
+ 0, /*match_criteria */
+ fname,
+ 1, /* force_update */
+ 1, /* allow_update */
+ recType, recTypeClientData);
if (streamp)
stream.destroy(streamp);
zh->m_record_type = original_record_type;
int delete_flag,
int test_mode,
const char *recordType,
- SYSNO *sysno,
+ zint *sysno,
const char *match_criteria,
const char *fname,
int force_update,
return ZEBRA_FAIL;
}
-
-
zebra_create_stream_mem(&stream, buf, buf_size);
- res = zebra_extract_record_stream(zh, &stream,
- delete_flag,
- test_mode,
- recordType,
- sysno,
- match_criteria,
- fname,
- force_update,
- allow_update,
- recType, clientData);
+ res = zebra_extract_records_stream(zh, &stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, clientData);
stream.destroy(&stream);
return res;
}
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData)
+{
+ ZEBRA_RES res = ZEBRA_OK;
+ while (1)
+ {
+ int more = 0;
+ res = zebra_extract_record_stream(zh, stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, recTypeClientData, &more);
+ if (!more)
+ {
+ res = ZEBRA_OK;
+ break;
+ }
+ if (res != ZEBRA_OK)
+ break;
+ if (sysno)
+ break;
+ }
+ return res;
+}
+
ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
struct ZebraRecStream *stream,
int delete_flag,
int test_mode,
const char *recordType,
- SYSNO *sysno,
+ zint *sysno,
const char *match_criteria,
const char *fname,
int force_update,
int allow_update,
RecType recType,
- void *recTypeClientData)
+ void *recTypeClientData,
+ int *more)
{
- SYSNO sysno0 = 0;
+ zint sysno0 = 0;
RecordAttr *recordAttr;
struct recExtractCtrl extractCtrl;
int r;
const char *matchStr = 0;
Record rec;
- off_t start_offset = 0;
+ off_t start_offset = 0, end_offset = 0;
const char *pr_fname = fname; /* filename to print .. */
- int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
+ int show_progress = zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit ? 1:0;
zebra_init_log_level();
extractCtrl.match_criteria[0] = '\0';
extractCtrl.staticrank = 0;
-
init_extractCtrl(zh, &extractCtrl);
extract_set_store_data_prepare(&extractCtrl);
r = (*recType->extract)(recTypeClientData, &extractCtrl);
-
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+
+ switch (r)
{
+ case RECCTRL_EXTRACT_EOF:
+ return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_ERROR_GENERIC:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: generic");
return ZEBRA_FAIL;
- }
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
- {
+ case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: no such filter");
return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_SKIP:
+ if (show_progress)
+ yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
+ *more = 1;
+
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+
+ return ZEBRA_OK;
+ case RECCTRL_EXTRACT_OK:
+ break;
+ default:
+ yaz_log (YLOG_WARN, "extract error: unknown error: %d", r);
+ return ZEBRA_FAIL;
}
-
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+ else
+ end_offset = stream->tellf(stream);
+
all_matches_add(&extractCtrl);
if (extractCtrl.match_criteria[0])
match_criteria = extractCtrl.match_criteria;
}
- if (!sysno) {
+ *more = 1;
+ if (!sysno)
+ {
sysno = &sysno0;
if (match_criteria && *match_criteria) {
matchStr = match_criteria;
} else {
if (zh->m_record_id && *zh->m_record_id) {
- matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname,
- zh->m_record_id);
+ matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname,
+ zh->m_record_id);
if (!matchStr)
{
- yaz_log (YLOG_WARN, "Bad match criteria (recordID)");
+ yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
return ZEBRA_FAIL;
}
}
yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
return ZEBRA_FAIL;
}
+ else if (!force_update)
+ {
+ yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ yaz_log (YLOG_WARN, "cannot update record above (seems new)");
+ return ZEBRA_FAIL;
+ }
if (show_progress)
yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
(zint) start_offset);
*sysno = rec->sysno;
recordAttr = rec_init_attr (zh->reg->zei, rec);
+ if (extractCtrl.staticrank < 0)
+ {
+ yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
+ extractCtrl.staticrank = 0;
+ }
recordAttr->staticrank = extractCtrl.staticrank;
if (matchStr)
sizeof(*sysno), sysno);
}
-
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
+ extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
zh->records_inserted++;
}
else
recordAttr = rec_init_attr (zh->reg->zei, rec);
+ /* decrease total size */
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ - recordAttr->recordSize);
+
zebra_rec_keys_set_buf(delkeys,
rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
rec->size[recInfo_sortKeys],
0);
- extract_flushSortKeys (zh, *sysno, 0, sortKeys);
- extract_flushRecordKeys (zh, *sysno, 0, delkeys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
+ extract_flush_record_keys(zh, *sysno, 0, delkeys,
+ recordAttr->staticrank);
if (delete_flag)
{
/* record going to be deleted */
else
{
if (show_progress)
- yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
- pr_fname, (zint) ZINT_FORMAT);
+ yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
recordAttr->staticrank = extractCtrl.staticrank;
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
+ extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
zh->records_updated++;
}
zebra_rec_keys_close(delkeys);
&rec->info[recInfo_sortKeys],
&rec->size[recInfo_sortKeys]);
- /* save file size of original record */
- zebraExplain_recordBytesIncrement (zh->reg->zei,
- - recordAttr->recordSize);
if (stream)
{
- off_t end_offset = stream->endf(stream, 0);
-
- if (!end_offset)
- end_offset = stream->tellf(stream);
- else
- stream->seekf(stream, end_offset);
-
recordAttr->recordSize = end_offset - start_offset;
zebraExplain_recordBytesIncrement(zh->reg->zei,
recordAttr->recordSize);
zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
0);
- extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
+ extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
zebra_rec_keys_close(delkeys);
zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
rec->size[recInfo_sortKeys],
0);
- extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
+ extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
zebra_rec_keys_close(sortkeys);
}
- extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
- extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
-
+ extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
+ extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
+
xfree (rec->info[recInfo_delKeys]);
zebra_rec_keys_get_buf(zh->reg->keys,
&rec->info[recInfo_delKeys],
}
}
-void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
- int cmd,
- zebra_rec_keys_t reckeys,
- zint staticrank)
+void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd,
+ zebra_rec_keys_t reckeys,
+ zint staticrank)
{
ZebraExplainInfo zei = zh->reg->zei;
extract_rec_keys_log(zh, cmd, reckeys, log_level_details);
}
- if (!zh->reg->key_buf)
+ if (!zh->reg->key_block)
{
- int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
- if (mem <= 0)
- {
- yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB");
- mem= 1024*1024*8;
- }
- /* FIXME: That "8" should be in a default settings include */
- /* not hard-coded here! -H */
- zh->reg->key_buf = (char**) xmalloc (mem);
- zh->reg->ptr_top = mem/sizeof(char*);
- zh->reg->ptr_i = 0;
- zh->reg->key_buf_used = 0;
- zh->reg->key_file_no = 0;
+ int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
+ const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", ".");
+ int use_threads = atoi(res_get_def (zh->res, "threads", "1"));
+ zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
}
zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
struct it_key key_in;
while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
- int ch = 0;
- int i, j = 0;
- struct it_key key_out;
-
- assert(key_in.len >= 2);
- assert(key_in.len <= IT_KEY_LEVEL_MAX);
-
- /* check for buffer overflow */
- if (zh->reg->key_buf_used + 1024 >
- (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
- extract_flushWriteKeys (zh, 0);
-
- ++(zh->reg->ptr_i);
- assert(zh->reg->ptr_i > 0);
- (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
- (char*)zh->reg->key_buf + zh->reg->key_buf_used;
-
- /* key_in.mem[0] ord/ch */
- /* key_in.mem[1] filter specified record ID */
-
- /* encode the ordinal value (field/use/attribute) .. */
- ch = CAST_ZINT_TO_INT(key_in.mem[0]);
- zh->reg->key_buf_used +=
- key_SU_encode(ch, (char*)zh->reg->key_buf +
- zh->reg->key_buf_used);
-
- /* copy the 0-terminated stuff from str to output */
- memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
- zh->reg->key_buf_used += slen;
- ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0';
-
- /* the delete/insert indicator */
- ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd;
-
- if (zh->m_staticrank) /* rank config enabled ? */
- {
- if (staticrank < 0)
- {
- yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
- (long) staticrank);
- staticrank = 0;
- }
- key_out.mem[j++] = staticrank;
- }
-
- if (key_in.mem[1]) /* filter specified record ID */
- key_out.mem[j++] = key_in.mem[1];
- else
- key_out.mem[j++] = sysno;
- for (i = 2; i < key_in.len; i++)
- key_out.mem[j++] = key_in.mem[i];
- key_out.len = j;
-
- memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used,
- &key_out, sizeof(key_out));
- (zh->reg->key_buf_used) += sizeof(key_out);
+ key_block_write(zh->reg->key_block, sysno,
+ &key_in, cmd, str, slen,
+ staticrank, zh->m_staticrank);
}
}
}
-void extract_flushWriteKeys (ZebraHandle zh, int final)
- /* optimizing: if final=1, and no files written yet */
- /* push the keys directly to merge, sidestepping the */
- /* temp file altogether. Speeds small updates */
-{
- FILE *outf;
- char out_fname[200];
- char *prevcp, *cp;
- struct encode_info encode_info;
- int ptr_i = zh->reg->ptr_i;
- int temp_policy;
- if (!zh->reg->key_buf || ptr_i <= 0)
- {
- yaz_log(log_level_extract, " nothing to flush section=%d buf=%p i=%d",
- zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
- return;
- }
-
- (zh->reg->key_file_no)++;
- yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no));
- yaz_log(log_level_extract, " sort_buff at %p n=%d",
- zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
-
-
-#if USE_SHELLSORT
- shellsort(zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
- sizeof(char*), key_qsort_compare);
-#else
- qsort(zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
- sizeof(char*), key_qsort_compare);
-#endif
- /* zebra.cfg: tempfiles:
- Y: always use temp files (old way)
- A: use temp files, if more than one (auto)
- = if this is both the last and the first
- N: never bother with temp files (new) */
-
- temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
- if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
- yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
- temp_policy);
- temp_policy='A';
- }
-
- if ( ( temp_policy =='N' ) || /* always from memory */
- ( ( temp_policy =='A' ) && /* automatic */
- (zh->reg->key_file_no == 1) && /* this is first time */
- (final) ) ) /* and last (=only) time */
- { /* go directly from memory */
- zh->reg->key_file_no =0; /* signal not to read files */
- zebra_index_merge(zh);
- zh->reg->ptr_i = 0;
- zh->reg->key_buf_used = 0;
- return;
- }
-
- /* Not doing directly from memory, write into a temp file */
- extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
-
- if (!(outf = fopen (out_fname, "wb")))
- {
- yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
- zebra_exit("extract_flushWriteKeys");
- }
- yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no);
- prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
-
- encode_key_init (&encode_info);
- encode_key_write (cp, &encode_info, outf);
-
- while (--ptr_i > 0)
- {
- cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
- if (strcmp (cp, prevcp))
- {
- encode_key_flush ( &encode_info, outf);
- encode_key_init (&encode_info);
- encode_key_write (cp, &encode_info, outf);
- prevcp = cp;
- }
- else
- encode_key_write (cp + strlen(cp), &encode_info, outf);
- }
- encode_key_flush ( &encode_info, outf);
- if (fclose (outf))
- {
- yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname);
- zebra_exit("extract_flushWriteKeys");
- }
- yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no);
- zh->reg->ptr_i = 0;
- zh->reg->key_buf_used = 0;
-}
ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh,
zebra_rec_keys_t reckeys,
struct it_key key;
while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
{
- char dst_buf[IT_MAX_WORD];
- char *dst_term = dst_buf;
+ char *dst_term = 0;
int ord;
zint seqno;
int index_type;
zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
}
+static void extract_add_staticrank_string(RecWord *p,
+ const char *str, int length)
+{
+ char valz[40];
+ struct recExtractCtrl *ctrl = p->extractCtrl;
+
+ if (length > sizeof(valz)-1)
+ length = sizeof(valz)-1;
+
+ memcpy(valz, str, length);
+ valz[length] = '\0';
+ ctrl->staticrank = atozint(valz);
+}
+
static void extract_add_string(RecWord *p, const char *string, int length)
{
ZebraHandle zh = p->extractCtrl->handle;
if (!p->index_name)
return;
- if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
- extract_add_sort_string(p, string, length);
- else
+ if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
{
extract_add_index_string(p, zinfo_index_category_index,
string, length);
&word, zinfo_index_category_alwaysmatches, "", 0);
}
}
+ else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
+ {
+ extract_add_sort_string(p, string, length);
+ }
+ else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type))
+ {
+ extract_add_staticrank_string(p, string, length);
+ }
}
static void extract_add_incomplete_field(RecWord *p)
ZebraHandle zh = p->extractCtrl->handle;
const char *b = p->term_buf;
int remain = p->term_len;
+ int first = 1;
const char **map = 0;
if (remain > 0)
map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
- if (map)
- {
- if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
- {
- /* first in field marker */
- extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
- p->seqno++;
- }
- }
while (map)
{
char buf[IT_MAX_WORD+1];
}
if (!i)
return;
+
+ if (first)
+ {
+ first = 0;
+ if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ {
+ /* first in field marker */
+ extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
+ p->seqno++;
+ }
+ }
extract_add_string (p, buf, i);
p->seqno++;
}
p->setStoreData = extract_set_store_data_cb;
}
-static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid)
+static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
{
ZebraHandle zh = (ZebraHandle) p->handle;
zebraExplain_addSchema (zh->reg->zei, oid);
}
-void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t reckeys)
+void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t reckeys)
{
+#if 0
+ yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
+ cmd, sysno);
+ extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
+#endif
+
if (zebra_rec_keys_rewind(reckeys))
{
- SortIdx sortIdx = zh->reg->sortIdx;
+ zebra_sort_index_t si = zh->reg->sort_index;
size_t slen;
const char *str;
struct it_key key_in;
- sortIdx_sysno (sortIdx, sysno);
+ zebra_sort_sysno(si, sysno);
while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
- sortIdx_type(sortIdx, ord);
+ zebra_sort_type(si, ord);
if (cmd == 1)
- sortIdx_add(sortIdx, str, slen);
+ zebra_sort_add(si, str, slen);
else
- sortIdx_add(sortIdx, "", 1);
+ zebra_sort_delete(si);
}
}
}
-static void encode_key_init(struct encode_info *i)
-{
- i->encode_handle = iscz1_start();
- i->decode_handle = iscz1_start();
-}
-
-static void encode_key_write (char *k, struct encode_info *i, FILE *outf)
-{
- struct it_key key;
- char *bp = i->buf, *bp0;
- const char *src = (char *) &key;
-
- /* copy term to output buf */
- while ((*bp++ = *k++))
- ;
- /* and copy & align key so we can mangle */
- memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */
-
-#if 0
- /* debugging */
- key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d");
-#endif
- assert(key.mem[0] >= 0);
-
- bp0 = bp++;
- iscz1_encode(i->encode_handle, &bp, &src);
-
- *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */
- if (fwrite (i->buf, bp - i->buf, 1, outf) != 1)
- {
- yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite");
- zebra_exit("encode_key_write");
- }
-
-#if 0
- /* debugging */
- if (1)
- {
- struct it_key key2;
- const char *src = bp0+1;
- char *dst = (char*) &key2;
- iscz1_decode(i->decode_handle, &dst, &src);
-
- key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d");
-
- assert(key2.mem[1]);
- }
-#endif
-}
-
-static void encode_key_flush (struct encode_info *i, FILE *outf)
-{
- iscz1_stop(i->encode_handle);
- iscz1_stop(i->decode_handle);
-}
-
/*
* Local variables:
* c-basic-offset: 4