X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=64e39d33e84584fdd29366f372acd1a4eb402bf1;hp=b14053109e6194c38d55eeea3d6988cd15fc36a4;hb=6988ba91e363565638c27a8d5895ad9afc409e75;hpb=418038362451c405454fd4bf8dd2100975a8b764 diff --git a/index/extract.c b/index/extract.c index b140531..64e39d3 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.242 2006-11-25 09:15:19 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: extract.c,v 1.259 2007-08-21 11:06:47 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -40,6 +40,16 @@ static int log_level_extract = 0; static int log_level_details = 0; static int log_level_initialized = 0; +/* 1 if we use eliminitate identical delete/insert keys */ +/* eventually this the 0-case code will be removed */ +#define FLUSH2 1 + +void extract_flush_record_keys2(ZebraHandle zh, zint sysno, + zebra_rec_keys_t ins_keys, + zint ins_rank, + zebra_rec_keys_t del_keys, + zint del_rank); + static void zebra_init_log_level(void) { if (!log_level_initialized) @@ -59,8 +69,18 @@ static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); static void extract_token_add (RecWord *p); +static void check_log_limit(ZebraHandle zh) +{ + if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit) + { + yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest", + zh->m_file_verbose_limit); + } +} + static void logRecord (ZebraHandle zh) { + check_log_limit(zh); ++zh->records_processed; if (!(zh->records_processed % 1000)) { @@ -71,6 +91,20 @@ static void logRecord (ZebraHandle zh) } } +static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) +{ + int i; + for (i = 0; i<256; i++) + { + if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) + ctrl->seqno[i] = 1; + else + ctrl->seqno[i] = 0; + } + ctrl->flagShowRecords = !zh->m_flag_rw; +} + + static void extract_add_index_string (RecWord *p, zinfo_index_category_t cat, const char *str, int length); @@ -88,6 +122,147 @@ static void extract_init(struct recExtractCtrl *p, RecWord *w) w->segment = 0; } +struct snip_rec_info { + ZebraHandle zh; + zebra_snippets *snippets; +}; + + +static void snippet_add_complete_field(RecWord *p) +{ + +} + +static void snippet_add_incomplete_field(RecWord *p, int ord) +{ + struct snip_rec_info *h = p->extractCtrl->handle; + ZebraHandle zh = h->zh; + const char *b = p->term_buf; + int remain = p->term_len; + int first = 1; + const char **map = 0; + const char *start = b; + + if (remain > 0) + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); + + while (map) + { + char buf[IT_MAX_WORD+1]; + const char *last = b; + int i, remain; + + /* Skip spaces */ + while (map && *map && **map == *CHR_SPACE) + { + remain = p->term_len - (b - p->term_buf); + last = b; + if (remain > 0) + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, + remain, 0); + else + map = 0; + } + if (!map) + break; + if (start != last) + { + zebra_snippets_appendn(h->snippets, p->seqno, 1, ord, + start, last - start); + + } + start = last; + + i = 0; + while (map && *map && **map != *CHR_SPACE) + { + const char *cp = *map; + + while (i < IT_MAX_WORD && *cp) + buf[i++] = *(cp++); + remain = p->term_len - (b - p->term_buf); + last = b; + if (remain > 0) + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); + else + map = 0; + } + if (!i) + return; + + if (first) + { + first = 0; + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + p->seqno++; + } + } + if (start != last) + zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, + start, last - start); + start = last; + p->seqno++; + } + +} + +static void snippet_token_add(RecWord *p) +{ + struct snip_rec_info *h = p->extractCtrl->handle; + ZebraHandle zh = h->zh; + + if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type)) + { + ZebraExplainInfo zei = zh->reg->zei; + int ch = zebraExplain_lookup_attr_str( + zei, zinfo_index_category_index, p->index_type, p->index_name); + + if (zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type)) + snippet_add_complete_field (p); + else + snippet_add_incomplete_field(p, ch); + } +} + +static void snippet_schema_add( + struct recExtractCtrl *p, Odr_oid *oid) +{ + +} + +void extract_snippet(ZebraHandle zh, zebra_snippets *sn, + struct ZebraRecStream *stream, + RecType rt, void *recTypeClientData) +{ + struct recExtractCtrl extractCtrl; + struct snip_rec_info info; + int r; + + extractCtrl.stream = stream; + extractCtrl.first_record = 1; + extractCtrl.init = extract_init; + extractCtrl.tokenAdd = snippet_token_add; + extractCtrl.schemaAdd = snippet_schema_add; + assert(zh->reg); + assert(zh->reg->dh); + + extractCtrl.dh = zh->reg->dh; + + info.zh = zh; + info.snippets = sn; + extractCtrl.handle = &info; + extractCtrl.match_criteria[0] = '\0'; + extractCtrl.staticrank = 0; + extractCtrl.action = action_insert; + + init_extractCtrl(zh, &extractCtrl); + + r = (*rt->extract)(recTypeClientData, &extractCtrl); + +} + static void searchRecordKey(ZebraHandle zh, zebra_rec_keys_t reckeys, const char *index_name, @@ -285,19 +460,6 @@ struct recordLogInfo { struct recordGroup *rGroup; }; -static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) -{ - int i; - for (i = 0; i<256; i++) - { - if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) - ctrl->seqno[i] = 1; - else - ctrl->seqno[i] = 0; - } - ctrl->flagShowRecords = !zh->m_flag_rw; -} - static void all_matches_add(struct recExtractCtrl *ctrl) { RecWord word; @@ -309,6 +471,18 @@ static void all_matches_add(struct recExtractCtrl *ctrl) "", 0); } +ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + int test_mode, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData); + + ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, int deleteFlag) { @@ -351,8 +525,11 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, } if (!zh->m_record_type) { - if (zh->records_processed < zh->m_file_verbose_limit) + check_log_limit(zh); + if (zh->records_processed + zh->records_skipped + < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "? %s", fname); + zh->records_skipped++; return 0; } /* determine match criteria */ @@ -404,26 +581,15 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, streamp = &stream; zebra_create_stream_fd(streamp, fd, 0); } - while(1) - { - int more = 0; - r = zebra_extract_record_stream(zh, streamp, - deleteFlag, - 0, /* tst_mode */ - zh->m_record_type, - sysno, - 0, /*match_criteria */ - fname, - 1, /* force_update */ - 1, /* allow_update */ - recType, recTypeClientData, &more); - if (!more) - break; - if (sysno) - { - break; - } - } + r = zebra_extract_records_stream(zh, streamp, + deleteFlag ? + action_delete : action_update, + 0, /* tst_mode */ + zh->m_record_type, + sysno, + 0, /*match_criteria */ + fname, + recType, recTypeClientData); if (streamp) stream.destroy(streamp); zh->m_record_type = original_record_type; @@ -439,20 +605,17 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, - int delete_flag, + enum zebra_recctrl_action_t action, int test_mode, const char *recordType, zint *sysno, const char *match_criteria, - const char *fname, - int force_update, - int allow_update) + const char *fname) { struct ZebraRecStream stream; ZEBRA_RES res; void *clientData; RecType recType = 0; - int more = 0; if (recordType && *recordType) { @@ -483,31 +646,77 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, zebra_create_stream_mem(&stream, buf, buf_size); - res = zebra_extract_record_stream(zh, &stream, - delete_flag, - test_mode, - recordType, - sysno, - match_criteria, - fname, - force_update, - allow_update, - recType, clientData, &more); + res = zebra_extract_records_stream(zh, &stream, + action, + test_mode, + recordType, + sysno, + match_criteria, + fname, + recType, clientData); stream.destroy(&stream); return res; } +ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + int test_mode, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData) +{ + ZEBRA_RES res = ZEBRA_OK; + while (1) + { + int more = 0; + res = zebra_extract_record_stream(zh, stream, + action, + test_mode, + recordType, + sysno, + match_criteria, + fname, + recType, recTypeClientData, &more); + if (!more) + { + res = ZEBRA_OK; + break; + } + if (res != ZEBRA_OK) + break; + if (sysno) + break; + } + return res; +} + + +static WRBUF wrbuf_hex_str(const char *cstr) +{ + size_t i; + WRBUF w = wrbuf_alloc(); + for (i = 0; cstr[i]; i++) + { + if (cstr[i] < ' ' || cstr[i] > 126) + wrbuf_printf(w, "\\%02X", cstr[i] & 0xff); + else + wrbuf_putc(w, cstr[i]); + } + return w; +} ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, struct ZebraRecStream *stream, - int delete_flag, + enum zebra_recctrl_action_t action, int test_mode, const char *recordType, zint *sysno, const char *match_criteria, const char *fname, - int force_update, - int allow_update, RecType recType, void *recTypeClientData, int *more) @@ -521,7 +730,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, Record rec; off_t start_offset = 0, end_offset = 0; const char *pr_fname = fname; /* filename to print .. */ - int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; + int show_progress = zh->records_processed + zh->records_skipped + < zh->m_file_verbose_limit ? 1:0; zebra_init_log_level(); @@ -556,44 +766,60 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extractCtrl.handle = zh; extractCtrl.match_criteria[0] = '\0'; extractCtrl.staticrank = 0; + extractCtrl.action = action; init_extractCtrl(zh, &extractCtrl); - + extract_set_store_data_prepare(&extractCtrl); r = (*recType->extract)(recTypeClientData, &extractCtrl); + + if (action == action_update) + { + action = extractCtrl.action; + } - if (r == RECCTRL_EXTRACT_EOF) - return ZEBRA_FAIL; - else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) + switch (r) { + case RECCTRL_EXTRACT_EOF: + return ZEBRA_FAIL; + case RECCTRL_EXTRACT_ERROR_GENERIC: /* error occured during extraction ... */ yaz_log (YLOG_WARN, "extract error: generic"); return ZEBRA_FAIL; - } - else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) - { + case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER: /* error occured during extraction ... */ yaz_log (YLOG_WARN, "extract error: no such filter"); return ZEBRA_FAIL; + case RECCTRL_EXTRACT_SKIP: + if (show_progress) + yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT, + recordType, pr_fname, (zint) start_offset); + *more = 1; + + end_offset = stream->endf(stream, 0); + if (end_offset) + stream->seekf(stream, end_offset); + + return ZEBRA_OK; + case RECCTRL_EXTRACT_OK: + break; + default: + yaz_log (YLOG_WARN, "extract error: unknown error: %d", r); + return ZEBRA_FAIL; } - + end_offset = stream->endf(stream, 0); + if (end_offset) + stream->seekf(stream, end_offset); + else + end_offset = stream->tellf(stream); + all_matches_add(&extractCtrl); if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; - - - end_offset = stream->endf(stream, 0); - - if (!end_offset) - end_offset = stream->tellf(stream); - else - stream->seekf(stream, end_offset); - } - *more = 1; if (!sysno) { @@ -618,12 +844,20 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, int db_ord = zebraExplain_get_database_ord(zh->reg->zei); char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord, matchStr); + + + if (log_level_extract) + { + WRBUF w = wrbuf_hex_str(matchStr); + yaz_log(log_level_extract, "matchStr: %s", wrbuf_cstr(w)); + wrbuf_destroy(w); + } if (rinfo) { assert(*rinfo == sizeof(*sysno)); memcpy (sysno, rinfo+1, sizeof(*sysno)); } - } + } } if (zebra_rec_keys_empty(zh->reg->keys)) { @@ -636,13 +870,20 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (! *sysno) { /* new record */ - if (delete_flag) + if (action == action_delete) { yaz_log (YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); yaz_log (YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; } + else if (action == action_replace) + { + yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); + yaz_log (YLOG_WARN, "cannot update record above (seems new)"); + return ZEBRA_FAIL; + } if (show_progress) yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); @@ -651,7 +892,11 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); - recordAttr->staticrank = extractCtrl.staticrank; + if (extractCtrl.staticrank < 0) + { + yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0"); + extractCtrl.staticrank = 0; + } if (matchStr) { @@ -661,8 +906,15 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); +#if FLUSH2 + extract_flush_record_keys2(zh, *sysno, + zh->reg->keys, extractCtrl.staticrank, + 0, recordAttr->staticrank); +#else extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + extractCtrl.staticrank); +#endif + recordAttr->staticrank = extractCtrl.staticrank; zh->records_inserted++; } else @@ -670,7 +922,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); - if (!allow_update) + if (action == action_insert) { yaz_log (YLOG_LOG, "skipped %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); @@ -697,11 +949,17 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, 0); extract_flush_sort_keys(zh, *sysno, 0, sortKeys); +#if !FLUSH2 extract_flush_record_keys(zh, *sysno, 0, delkeys, recordAttr->staticrank); - if (delete_flag) +#endif + if (action == action_delete) { /* record going to be deleted */ +#if FLUSH2 + extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank, + delkeys, recordAttr->staticrank); +#endif if (zebra_rec_keys_empty(delkeys)) { yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, @@ -722,19 +980,28 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } rec_del (zh->reg->records, &rec); } + zebra_rec_keys_close(delkeys); + zebra_rec_keys_close(sortKeys); rec_free(&rec); logRecord(zh); return ZEBRA_OK; } else - { + { /* update or special_update */ if (show_progress) yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, pr_fname, (zint) start_offset); - recordAttr->staticrank = extractCtrl.staticrank; extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); - extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + +#if FLUSH2 + extract_flush_record_keys2(zh, *sysno, + zh->reg->keys, extractCtrl.staticrank, + delkeys, recordAttr->staticrank); +#else + extract_flush_record_keys(zh, *sysno, 1, + zh->reg->keys, extractCtrl.staticrank); +#endif + recordAttr->staticrank = extractCtrl.staticrank; zh->records_updated++; } zebra_rec_keys_close(delkeys); @@ -850,6 +1117,8 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) extractCtrl.flagShowRecords = 0; extractCtrl.match_criteria[0] = '\0'; extractCtrl.staticrank = 0; + extractCtrl.action = action_update; + extractCtrl.handle = handle; extractCtrl.first_record = 1; @@ -867,7 +1136,13 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); +#if FLUSH2 + extract_flush_record_keys2(zh, rec->sysno, + zh->reg->keys, 0, delkeys, 0); +#else extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0); + extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0); +#endif zebra_rec_keys_close(delkeys); zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], @@ -877,7 +1152,14 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys); zebra_rec_keys_close(sortkeys); } - extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0); + else + { +#if FLUSH2 + extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0); +#else + extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0); +#endif + } extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys); xfree (rec->info[recInfo_delKeys]); @@ -1008,6 +1290,79 @@ void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, } } +void extract_flush_record_keys2(ZebraHandle zh, zint sysno, + zebra_rec_keys_t ins_keys, zint ins_rank, + zebra_rec_keys_t del_keys, zint del_rank) +{ + ZebraExplainInfo zei = zh->reg->zei; + int normal = 0; + int optimized = 0; + + if (!zh->reg->key_block) + { + int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8")); + const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", "."); + int use_threads = atoi(res_get_def (zh->res, "threads", "1")); + zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads); + } + + if (ins_keys) + { + extract_rec_keys_adjust(zh, 1, ins_keys); + if (!del_keys) + zebraExplain_recordCountIncrement (zei, 1); + zebra_rec_keys_rewind(ins_keys); + } + if (del_keys) + { + extract_rec_keys_adjust(zh, 0, del_keys); + if (!ins_keys) + zebraExplain_recordCountIncrement (zei, -1); + zebra_rec_keys_rewind(del_keys); + } + + while (1) + { + size_t del_slen; + const char *del_str; + struct it_key del_key_in; + int del = 0; + + size_t ins_slen; + const char *ins_str; + struct it_key ins_key_in; + int ins = 0; + + if (del_keys) + del = zebra_rec_keys_read(del_keys, &del_str, &del_slen, + &del_key_in); + if (ins_keys) + ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen, + &ins_key_in); + + if (del && ins && ins_rank == del_rank + && !key_compare(&del_key_in, &ins_key_in) + && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen)) + { + optimized++; + continue; + } + if (!del && !ins) + break; + + normal++; + if (del) + key_block_write(zh->reg->key_block, sysno, + &del_key_in, 0, del_str, del_slen, + del_rank, zh->m_staticrank); + if (ins) + key_block_write(zh->reg->key_block, sysno, + &ins_key_in, 1, ins_str, ins_slen, + ins_rank, zh->m_staticrank); + } + yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized); +} + void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t reckeys, zint staticrank) @@ -1032,6 +1387,10 @@ void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd, } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); +#if 0 + yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd); + print_rec_keys(zh, reckeys); +#endif if (zebra_rec_keys_rewind(reckeys)) { size_t slen; @@ -1046,7 +1405,6 @@ void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd, } } - ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets) @@ -1059,8 +1417,7 @@ ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, struct it_key key; while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) { - char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; + char *dst_term = 0; int ord; zint seqno; int index_type; @@ -1074,7 +1431,7 @@ ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, assert(index_type); zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str); - zebra_snippets_append(snippets, seqno, ord, dst_term); + zebra_snippets_append(snippets, seqno, 0, ord, dst_term); nmem_reset(nmem); } } @@ -1154,6 +1511,20 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } +static void extract_add_staticrank_string(RecWord *p, + const char *str, int length) +{ + char valz[40]; + struct recExtractCtrl *ctrl = p->extractCtrl; + + if (length > sizeof(valz)-1) + length = sizeof(valz)-1; + + memcpy(valz, str, length); + valz[length] = '\0'; + ctrl->staticrank = atozint(valz); +} + static void extract_add_string(RecWord *p, const char *string, int length) { ZebraHandle zh = p->extractCtrl->handle; @@ -1162,9 +1533,7 @@ static void extract_add_string(RecWord *p, const char *string, int length) if (!p->index_name) return; - if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) - extract_add_sort_string(p, string, length); - else + if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type)) { extract_add_index_string(p, zinfo_index_category_index, string, length); @@ -1178,6 +1547,14 @@ static void extract_add_string(RecWord *p, const char *string, int length) &word, zinfo_index_category_alwaysmatches, "", 0); } } + else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) + { + extract_add_sort_string(p, string, length); + } + else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type)) + { + extract_add_staticrank_string(p, string, length); + } } static void extract_add_incomplete_field(RecWord *p) @@ -1185,20 +1562,12 @@ static void extract_add_incomplete_field(RecWord *p) ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; int remain = p->term_len; + int first = 1; const char **map = 0; if (remain > 0) map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); - if (map) - { - if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) - { - /* first in field marker */ - extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); - p->seqno++; - } - } while (map) { char buf[IT_MAX_WORD+1]; @@ -1231,6 +1600,17 @@ static void extract_add_incomplete_field(RecWord *p) } if (!i) return; + + if (first) + { + first = 0; + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); + p->seqno++; + } + } extract_add_string (p, buf, i); p->seqno++; } @@ -1301,9 +1681,9 @@ static void extract_token_add(RecWord *p) ZebraHandle zh = p->extractCtrl->handle; WRBUF wrbuf; - if (log_level_extract) + if (log_level_details) { - yaz_log(log_level_extract, "extract_token_add " + yaz_log(log_level_details, "extract_token_add " "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s", p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); @@ -1354,24 +1734,30 @@ static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t reckeys) { +#if 0 + yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT, + cmd, sysno); + extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG); +#endif + if (zebra_rec_keys_rewind(reckeys)) { - SortIdx sortIdx = zh->reg->sortIdx; + zebra_sort_index_t si = zh->reg->sort_index; size_t slen; const char *str; struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - sortIdx_type(sortIdx, ord); + zebra_sort_type(si, ord); if (cmd == 1) - sortIdx_add(sortIdx, str, slen); + zebra_sort_add(si, str, slen); else - sortIdx_add(sortIdx, "", 1); + zebra_sort_delete(si); } } }