-/* $Id: extract.c,v 1.239 2006-11-21 14:32:38 adam Exp $
- Copyright (C) 1995-2006
+/* $Id: extract.c,v 1.251 2007-03-13 13:46:11 adam Exp $
+ Copyright (C) 1995-2007
Index Data ApS
This file is part of the Zebra server.
}
}
-static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t reckeys,
- zint staticrank);
-static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t skp);
+static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t reckeys,
+ zint staticrank);
+static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t skp);
static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add (RecWord *p);
+static void check_log_limit(ZebraHandle zh)
+{
+ if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit)
+ {
+ yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
+ zh->m_file_verbose_limit);
+ }
+}
+
static void logRecord (ZebraHandle zh)
{
+ check_log_limit(zh);
++zh->records_processed;
if (!(zh->records_processed % 1000))
{
#define FILE_MATCH_BLANK "\t "
-static char *fileMatchStr (ZebraHandle zh,
- zebra_rec_keys_t reckeys,
- const char *fname, const char *spec)
+static char *get_match_from_spec(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ const char *fname, const char *spec)
{
static char dstBuf[2048]; /* static here ??? */
char *dst = dstBuf;
"", 0);
}
-ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname,
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData);
+
+
+ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname,
int deleteFlag)
{
ZEBRA_RES r = ZEBRA_OK;
}
if (!zh->m_record_type)
{
- if (zh->records_processed < zh->m_file_verbose_limit)
+ check_log_limit(zh);
+ if (zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit)
yaz_log (YLOG_LOG, "? %s", fname);
+ zh->records_skipped++;
return 0;
}
/* determine match criteria */
streamp = &stream;
zebra_create_stream_fd(streamp, fd, 0);
}
- while(1)
- {
- r = zebra_extract_record_stream(zh, streamp,
- deleteFlag,
- 0, /* tst_mode */
- zh->m_record_type,
- sysno,
- 0, /*match_criteria */
- fname,
- 1, /* force_update */
- 1, /* allow_update */
- recType, recTypeClientData);
- if (r != ZEBRA_OK)
- {
- break;
- }
- if (sysno)
- {
- break;
- }
- }
+ r = zebra_extract_records_stream(zh, streamp,
+ deleteFlag,
+ 0, /* tst_mode */
+ zh->m_record_type,
+ sysno,
+ 0, /*match_criteria */
+ fname,
+ 1, /* force_update */
+ 1, /* allow_update */
+ recType, recTypeClientData);
if (streamp)
stream.destroy(streamp);
zh->m_record_type = original_record_type;
int delete_flag,
int test_mode,
const char *recordType,
- SYSNO *sysno,
+ zint *sysno,
const char *match_criteria,
const char *fname,
int force_update,
return ZEBRA_FAIL;
}
-
-
zebra_create_stream_mem(&stream, buf, buf_size);
- res = zebra_extract_record_stream(zh, &stream,
- delete_flag,
- test_mode,
- recordType,
- sysno,
- match_criteria,
- fname,
- force_update,
- allow_update,
- recType, clientData);
+ res = zebra_extract_records_stream(zh, &stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, clientData);
stream.destroy(&stream);
return res;
}
+ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh,
+ struct ZebraRecStream *stream,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ zint *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update,
+ RecType recType,
+ void *recTypeClientData)
+{
+ ZEBRA_RES res = ZEBRA_OK;
+ while (1)
+ {
+ int more = 0;
+ res = zebra_extract_record_stream(zh, stream,
+ delete_flag,
+ test_mode,
+ recordType,
+ sysno,
+ match_criteria,
+ fname,
+ force_update,
+ allow_update,
+ recType, recTypeClientData, &more);
+ if (!more)
+ {
+ res = ZEBRA_OK;
+ break;
+ }
+ if (res != ZEBRA_OK)
+ break;
+ if (sysno)
+ break;
+ }
+ return res;
+}
+
ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh,
struct ZebraRecStream *stream,
int delete_flag,
int test_mode,
const char *recordType,
- SYSNO *sysno,
+ zint *sysno,
const char *match_criteria,
const char *fname,
int force_update,
int allow_update,
RecType recType,
- void *recTypeClientData)
+ void *recTypeClientData,
+ int *more)
{
- SYSNO sysno0 = 0;
+ zint sysno0 = 0;
RecordAttr *recordAttr;
struct recExtractCtrl extractCtrl;
int r;
const char *matchStr = 0;
Record rec;
- off_t start_offset = 0;
+ off_t start_offset = 0, end_offset = 0;
const char *pr_fname = fname; /* filename to print .. */
- int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0;
+ int show_progress = zh->records_processed + zh->records_skipped
+ < zh->m_file_verbose_limit ? 1:0;
zebra_init_log_level();
extractCtrl.match_criteria[0] = '\0';
extractCtrl.staticrank = 0;
-
init_extractCtrl(zh, &extractCtrl);
extract_set_store_data_prepare(&extractCtrl);
r = (*recType->extract)(recTypeClientData, &extractCtrl);
-
- if (r == RECCTRL_EXTRACT_EOF)
- return ZEBRA_FAIL;
- else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+
+ switch (r)
{
+ case RECCTRL_EXTRACT_EOF:
+ return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_ERROR_GENERIC:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: generic");
return ZEBRA_FAIL;
- }
- else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
- {
+ case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER:
/* error occured during extraction ... */
yaz_log (YLOG_WARN, "extract error: no such filter");
return ZEBRA_FAIL;
+ case RECCTRL_EXTRACT_SKIP:
+ if (show_progress)
+ yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
+ *more = 1;
+
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+
+ return ZEBRA_OK;
+ case RECCTRL_EXTRACT_OK:
+ break;
+ default:
+ yaz_log (YLOG_WARN, "extract error: unknown error: %d", r);
+ return ZEBRA_FAIL;
}
-
+ end_offset = stream->endf(stream, 0);
+ if (end_offset)
+ stream->seekf(stream, end_offset);
+ else
+ end_offset = stream->tellf(stream);
+
all_matches_add(&extractCtrl);
if (extractCtrl.match_criteria[0])
match_criteria = extractCtrl.match_criteria;
}
- if (!sysno) {
+ *more = 1;
+ if (!sysno)
+ {
sysno = &sysno0;
if (match_criteria && *match_criteria) {
matchStr = match_criteria;
} else {
if (zh->m_record_id && *zh->m_record_id) {
- matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname,
- zh->m_record_id);
+ matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname,
+ zh->m_record_id);
if (!matchStr)
{
- yaz_log (YLOG_WARN, "Bad match criteria (recordID)");
+ yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
return ZEBRA_FAIL;
}
}
yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
return ZEBRA_FAIL;
}
+ else if (!force_update)
+ {
+ yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ yaz_log (YLOG_WARN, "cannot update record above (seems new)");
+ return ZEBRA_FAIL;
+ }
if (show_progress)
yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
(zint) start_offset);
*sysno = rec->sysno;
recordAttr = rec_init_attr (zh->reg->zei, rec);
+ if (extractCtrl.staticrank < 0)
+ {
+ yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
+ extractCtrl.staticrank = 0;
+ }
recordAttr->staticrank = extractCtrl.staticrank;
if (matchStr)
sizeof(*sysno), sysno);
}
-
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
+ extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
zh->records_inserted++;
}
else
recordAttr = rec_init_attr (zh->reg->zei, rec);
+ /* decrease total size */
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ - recordAttr->recordSize);
+
zebra_rec_keys_set_buf(delkeys,
rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
rec->size[recInfo_sortKeys],
0);
- extract_flushSortKeys (zh, *sysno, 0, sortKeys);
- extract_flushRecordKeys (zh, *sysno, 0, delkeys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
+ extract_flush_record_keys(zh, *sysno, 0, delkeys,
+ recordAttr->staticrank);
if (delete_flag)
{
/* record going to be deleted */
else
{
if (show_progress)
- yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
- pr_fname, (zint) ZINT_FORMAT);
+ yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
recordAttr->staticrank = extractCtrl.staticrank;
- extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
- extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
- recordAttr->staticrank);
+ extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
+ extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
zh->records_updated++;
}
zebra_rec_keys_close(delkeys);
&rec->info[recInfo_sortKeys],
&rec->size[recInfo_sortKeys]);
- /* save file size of original record */
- zebraExplain_recordBytesIncrement (zh->reg->zei,
- - recordAttr->recordSize);
if (stream)
{
- off_t end_offset = stream->endf(stream, 0);
-
- if (!end_offset)
- end_offset = stream->tellf(stream);
- else
- stream->seekf(stream, end_offset);
-
recordAttr->recordSize = end_offset - start_offset;
zebraExplain_recordBytesIncrement(zh->reg->zei,
recordAttr->recordSize);
zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
rec->size[recInfo_delKeys],
0);
- extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
+ extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
zebra_rec_keys_close(delkeys);
zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
rec->size[recInfo_sortKeys],
0);
- extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
+ extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
zebra_rec_keys_close(sortkeys);
}
- extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
- extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
-
+ extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
+ extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
+
xfree (rec->info[recInfo_delKeys]);
zebra_rec_keys_get_buf(zh->reg->keys,
&rec->info[recInfo_delKeys],
}
}
-void extract_flushRecordKeys(ZebraHandle zh, SYSNO sysno, int cmd,
- zebra_rec_keys_t reckeys,
- zint staticrank)
+void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd,
+ zebra_rec_keys_t reckeys,
+ zint staticrank)
{
ZebraExplainInfo zei = zh->reg->zei;
if (!zh->reg->key_block)
{
- int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
+ int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", ".");
- zh->reg->key_block = key_block_create(mem, key_tmp_dir);
+ int use_threads = atoi(res_get_def (zh->res, "threads", "1"));
+ zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
}
zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
struct it_key key;
while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
{
- char dst_buf[IT_MAX_WORD];
- char *dst_term = dst_buf;
+ char *dst_term = 0;
int ord;
zint seqno;
int index_type;
zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
}
+static void extract_add_staticrank_string(RecWord *p,
+ const char *str, int length)
+{
+ char valz[40];
+ struct recExtractCtrl *ctrl = p->extractCtrl;
+
+ if (length > sizeof(valz)-1)
+ length = sizeof(valz)-1;
+
+ memcpy(valz, str, length);
+ valz[length] = '\0';
+ ctrl->staticrank = atozint(valz);
+}
+
static void extract_add_string(RecWord *p, const char *string, int length)
{
ZebraHandle zh = p->extractCtrl->handle;
if (!p->index_name)
return;
- if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
- extract_add_sort_string(p, string, length);
- else
+ if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
{
extract_add_index_string(p, zinfo_index_category_index,
string, length);
&word, zinfo_index_category_alwaysmatches, "", 0);
}
}
+ else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
+ {
+ extract_add_sort_string(p, string, length);
+ }
+ else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type))
+ {
+ extract_add_staticrank_string(p, string, length);
+ }
}
static void extract_add_incomplete_field(RecWord *p)
ZebraHandle zh = p->extractCtrl->handle;
const char *b = p->term_buf;
int remain = p->term_len;
+ int first = 1;
const char **map = 0;
if (remain > 0)
map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
- if (map)
- {
- if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
- {
- /* first in field marker */
- extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
- p->seqno++;
- }
- }
while (map)
{
char buf[IT_MAX_WORD+1];
}
if (!i)
return;
+
+ if (first)
+ {
+ first = 0;
+ if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
+ {
+ /* first in field marker */
+ extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
+ p->seqno++;
+ }
+ }
extract_add_string (p, buf, i);
p->seqno++;
}
zebraExplain_addSchema (zh->reg->zei, oid);
}
-void extract_flushSortKeys(ZebraHandle zh, SYSNO sysno,
- int cmd, zebra_rec_keys_t reckeys)
+void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
+ int cmd, zebra_rec_keys_t reckeys)
{
+#if 0
+ yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
+ cmd, sysno);
+ extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
+#endif
+
if (zebra_rec_keys_rewind(reckeys))
{
- SortIdx sortIdx = zh->reg->sortIdx;
+ zebra_sort_index_t si = zh->reg->sort_index;
size_t slen;
const char *str;
struct it_key key_in;
- sortIdx_sysno (sortIdx, sysno);
+ zebra_sort_sysno(si, sysno);
while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
{
int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
- sortIdx_type(sortIdx, ord);
+ zebra_sort_type(si, ord);
if (cmd == 1)
- sortIdx_add(sortIdx, str, slen);
+ zebra_sort_add(si, str, slen);
else
- sortIdx_add(sortIdx, "", 1);
+ zebra_sort_delete(si);
}
}
}