X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=f6fab5d8188988cae95fd5748731357cc2357c46;hb=21f90a8618faec6bee8d125c12088b74db8eb8b9;hp=903b7c5f2f4b1cf3eedaf8fcdbd001c8b93650ad;hpb=bd6c6c5d9ee278f702572c33b4ea56827c7ca6cb;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 903b7c5..f6fab5d 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.244 2006-12-05 08:14:47 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: extract.c,v 1.252 2007-03-14 11:48:32 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -59,8 +59,18 @@ static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); static void extract_token_add (RecWord *p); +static void check_log_limit(ZebraHandle zh) +{ + if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit) + { + yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest", + zh->m_file_verbose_limit); + } +} + static void logRecord (ZebraHandle zh) { + check_log_limit(zh); ++zh->records_processed; if (!(zh->records_processed % 1000)) { @@ -309,6 +319,18 @@ static void all_matches_add(struct recExtractCtrl *ctrl) "", 0); } +ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + int test_mode, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData); + + ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, int deleteFlag) { @@ -351,8 +373,11 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, } if (!zh->m_record_type) { - if (zh->records_processed < zh->m_file_verbose_limit) + check_log_limit(zh); + if (zh->records_processed + zh->records_skipped + < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "? %s", fname); + zh->records_skipped++; return 0; } /* determine match criteria */ @@ -404,26 +429,15 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, streamp = &stream; zebra_create_stream_fd(streamp, fd, 0); } - while(1) - { - int more = 0; - r = zebra_extract_record_stream(zh, streamp, - deleteFlag, - 0, /* tst_mode */ - zh->m_record_type, - sysno, - 0, /*match_criteria */ - fname, - 1, /* force_update */ - 1, /* allow_update */ - recType, recTypeClientData, &more); - if (!more) - break; - if (sysno) - { - break; - } - } + r = zebra_extract_records_stream(zh, streamp, + deleteFlag ? + action_delete : action_update, + 0, /* tst_mode */ + zh->m_record_type, + sysno, + 0, /*match_criteria */ + fname, + recType, recTypeClientData); if (streamp) stream.destroy(streamp); zh->m_record_type = original_record_type; @@ -439,20 +453,17 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, - int delete_flag, + enum zebra_recctrl_action_t action, int test_mode, const char *recordType, zint *sysno, const char *match_criteria, - const char *fname, - int force_update, - int allow_update) + const char *fname) { struct ZebraRecStream stream; ZEBRA_RES res; void *clientData; RecType recType = 0; - int more = 0; if (recordType && *recordType) { @@ -483,31 +494,63 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, zebra_create_stream_mem(&stream, buf, buf_size); - res = zebra_extract_record_stream(zh, &stream, - delete_flag, - test_mode, - recordType, - sysno, - match_criteria, - fname, - force_update, - allow_update, - recType, clientData, &more); + res = zebra_extract_records_stream(zh, &stream, + action, + test_mode, + recordType, + sysno, + match_criteria, + fname, + recType, clientData); stream.destroy(&stream); return res; } +ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + int test_mode, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData) +{ + ZEBRA_RES res = ZEBRA_OK; + while (1) + { + int more = 0; + res = zebra_extract_record_stream(zh, stream, + action, + test_mode, + recordType, + sysno, + match_criteria, + fname, + recType, recTypeClientData, &more); + if (!more) + { + res = ZEBRA_OK; + break; + } + if (res != ZEBRA_OK) + break; + if (sysno) + break; + } + return res; +} + ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, struct ZebraRecStream *stream, - int delete_flag, + enum zebra_recctrl_action_t action, int test_mode, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, - int force_update, - int allow_update, RecType recType, void *recTypeClientData, int *more) @@ -521,7 +564,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, Record rec; off_t start_offset = 0, end_offset = 0; const char *pr_fname = fname; /* filename to print .. */ - int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; + int show_progress = zh->records_processed + zh->records_skipped + < zh->m_file_verbose_limit ? 1:0; zebra_init_log_level(); @@ -562,38 +606,48 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extract_set_store_data_prepare(&extractCtrl); r = (*recType->extract)(recTypeClientData, &extractCtrl); - - if (r == RECCTRL_EXTRACT_EOF) - return ZEBRA_FAIL; - else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) + + switch (r) { + case RECCTRL_EXTRACT_EOF: + return ZEBRA_FAIL; + case RECCTRL_EXTRACT_ERROR_GENERIC: /* error occured during extraction ... */ yaz_log (YLOG_WARN, "extract error: generic"); return ZEBRA_FAIL; - } - else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) - { + case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER: /* error occured during extraction ... */ yaz_log (YLOG_WARN, "extract error: no such filter"); return ZEBRA_FAIL; + case RECCTRL_EXTRACT_SKIP: + if (show_progress) + yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT, + recordType, pr_fname, (zint) start_offset); + *more = 1; + + end_offset = stream->endf(stream, 0); + if (end_offset) + stream->seekf(stream, end_offset); + + return ZEBRA_OK; + case RECCTRL_EXTRACT_OK: + break; + default: + yaz_log (YLOG_WARN, "extract error: unknown error: %d", r); + return ZEBRA_FAIL; } - + end_offset = stream->endf(stream, 0); + if (end_offset) + stream->seekf(stream, end_offset); + else + end_offset = stream->tellf(stream); + all_matches_add(&extractCtrl); if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; - - - end_offset = stream->endf(stream, 0); - - if (!end_offset) - end_offset = stream->tellf(stream); - else - stream->seekf(stream, end_offset); - } - *more = 1; if (!sysno) { @@ -636,13 +690,20 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (! *sysno) { /* new record */ - if (delete_flag) + if (action == action_delete) { yaz_log (YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); yaz_log (YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; } + else if (action == action_replace) + { + yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); + yaz_log (YLOG_WARN, "cannot update record above (seems new)"); + return ZEBRA_FAIL; + } if (show_progress) yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); @@ -675,7 +736,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); - if (!allow_update) + if (action == action_insert) { yaz_log (YLOG_LOG, "skipped %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); @@ -704,7 +765,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extract_flush_sort_keys(zh, *sysno, 0, sortKeys); extract_flush_record_keys(zh, *sysno, 0, delkeys, recordAttr->staticrank); - if (delete_flag) + if (action == action_delete) { /* record going to be deleted */ if (zebra_rec_keys_empty(delkeys)) @@ -732,7 +793,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, return ZEBRA_OK; } else - { + { /* update or special_update */ if (show_progress) yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); @@ -1064,8 +1125,7 @@ ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, struct it_key key; while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) { - char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; + char *dst_term = 0; int ord; zint seqno; int index_type; @@ -1159,6 +1219,20 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } +static void extract_add_staticrank_string(RecWord *p, + const char *str, int length) +{ + char valz[40]; + struct recExtractCtrl *ctrl = p->extractCtrl; + + if (length > sizeof(valz)-1) + length = sizeof(valz)-1; + + memcpy(valz, str, length); + valz[length] = '\0'; + ctrl->staticrank = atozint(valz); +} + static void extract_add_string(RecWord *p, const char *string, int length) { ZebraHandle zh = p->extractCtrl->handle; @@ -1167,9 +1241,7 @@ static void extract_add_string(RecWord *p, const char *string, int length) if (!p->index_name) return; - if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) - extract_add_sort_string(p, string, length); - else + if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type)) { extract_add_index_string(p, zinfo_index_category_index, string, length); @@ -1183,6 +1255,14 @@ static void extract_add_string(RecWord *p, const char *string, int length) &word, zinfo_index_category_alwaysmatches, "", 0); } } + else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) + { + extract_add_sort_string(p, string, length); + } + else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type)) + { + extract_add_staticrank_string(p, string, length); + } } static void extract_add_incomplete_field(RecWord *p) @@ -1362,24 +1442,30 @@ static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t reckeys) { +#if 0 + yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT, + cmd, sysno); + extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG); +#endif + if (zebra_rec_keys_rewind(reckeys)) { - SortIdx sortIdx = zh->reg->sortIdx; + zebra_sort_index_t si = zh->reg->sort_index; size_t slen; const char *str; struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - sortIdx_type(sortIdx, ord); + zebra_sort_type(si, ord); if (cmd == 1) - sortIdx_add(sortIdx, str, slen); + zebra_sort_add(si, str, slen); else - sortIdx_add(sortIdx, "", 1); + zebra_sort_delete(si); } } }