X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=614c0f16e51e96563504dd5c0fb140a522f17784;hp=3874dfe317fd3a1aab73366883d9b3f1640c2036;hb=161aa7805930f6b91ccea027e4afa4ccce41c379;hpb=852d5f1f9aa0a70f7e54a68143ee86752394a2f2 diff --git a/index/extract.c b/index/extract.c index 3874dfe..614c0f1 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.228 2006-08-22 13:39:27 adam Exp $ +/* $Id: extract.c,v 1.246 2006-12-18 23:40:07 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -36,64 +36,29 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -#define ENCODE_BUFLEN 768 -struct encode_info { - void *encode_handle; - void *decode_handle; - char buf[ENCODE_BUFLEN]; -}; - -static int log_level = 0; -static int log_level_initialized = 1; +static int log_level_extract = 0; +static int log_level_details = 0; +static int log_level_initialized = 0; -static void zebra_init_log_level() +static void zebra_init_log_level(void) { if (!log_level_initialized) { - log_level = yaz_log_module_level("extract"); log_level_initialized = 1; + + log_level_extract = yaz_log_module_level("extract"); + log_level_details = yaz_log_module_level("indexdetails"); } } -static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t reckeys, - zint staticrank); -static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t skp); +static void extract_flush_record_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t reckeys, + zint staticrank); +static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t skp); static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); static void extract_token_add (RecWord *p); -static void encode_key_init (struct encode_info *i); -static void encode_key_write (char *k, struct encode_info *i, FILE *outf); -static void encode_key_flush (struct encode_info *i, FILE *outf); - -#define USE_SHELLSORT 0 - -#if USE_SHELLSORT -static void shellsort(void *ar, int r, size_t s, - int (*cmp)(const void *a, const void *b)) -{ - char *a = ar; - char v[100]; - int h, i, j, k; - static const int incs[16] = { 1391376, 463792, 198768, 86961, 33936, - 13776, 4592, 1968, 861, 336, - 112, 48, 21, 7, 3, 1 }; - for ( k = 0; k < 16; k++) - for (h = incs[k], i = h; i < r; i++) - { - memcpy (v, a+s*i, s); - j = i; - while (j > h && (*cmp)(a + s*(j-h), v) > 0) - { - memcpy (a + s*j, a + s*(j-h), s); - j -= h; - } - memcpy (a+s*j, v, s); - } -} -#endif - static void logRecord (ZebraHandle zh) { ++zh->records_processed; @@ -174,9 +139,9 @@ static void searchRecordKey(ZebraHandle zh, #define FILE_MATCH_BLANK "\t " -static char *fileMatchStr (ZebraHandle zh, - zebra_rec_keys_t reckeys, - const char *fname, const char *spec) +static char *get_match_from_spec(ZebraHandle zh, + zebra_rec_keys_t reckeys, + const char *fname, const char *spec) { static char dstBuf[2048]; /* static here ??? */ char *dst = dstBuf; @@ -344,7 +309,7 @@ static void all_matches_add(struct recExtractCtrl *ctrl) "", 0); } -ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, +ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, int deleteFlag) { ZEBRA_RES r = ZEBRA_OK; @@ -365,7 +330,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, else sprintf (gprefix, "%s.", zh->m_group); - yaz_log(log_level, "zebra_extract_file %s", fname); + yaz_log(log_level_extract, "zebra_extract_file %s", fname); /* determine file extension */ *ext = '\0'; @@ -441,6 +406,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, } while(1) { + int more = 0; r = zebra_extract_record_stream(zh, streamp, deleteFlag, 0, /* tst_mode */ @@ -450,11 +416,9 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, fname, 1, /* force_update */ 1, /* allow_update */ - recType, recTypeClientData); - if (r != ZEBRA_OK) - { - break; - } + recType, recTypeClientData, &more); + if (!more) + break; if (sysno) { break; @@ -478,7 +442,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, int delete_flag, int test_mode, const char *recordType, - SYSNO *sysno, + zint *sysno, const char *match_criteria, const char *fname, int force_update, @@ -488,10 +452,12 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, ZEBRA_RES res; void *clientData; RecType recType = 0; + int more = 0; if (recordType && *recordType) { - yaz_log(log_level, "Record type explicitly specified: %s", recordType); + yaz_log(log_level_extract, + "Record type explicitly specified: %s", recordType); recType = recType_byName (zh->reg->recTypes, zh->res, recordType, &clientData); } @@ -502,7 +468,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "No such record type defined"); return ZEBRA_FAIL; } - yaz_log(log_level, "Get record type from rgroup: %s", + yaz_log(log_level_extract, "Get record type from rgroup: %s", zh->m_record_type); recType = recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type, &clientData); @@ -515,8 +481,6 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, return ZEBRA_FAIL; } - - zebra_create_stream_mem(&stream, buf, buf_size); res = zebra_extract_record_stream(zh, &stream, @@ -528,7 +492,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, fname, force_update, allow_update, - recType, clientData); + recType, clientData, &more); stream.destroy(&stream); return res; } @@ -539,22 +503,23 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, int delete_flag, int test_mode, const char *recordType, - SYSNO *sysno, + zint *sysno, const char *match_criteria, const char *fname, int force_update, int allow_update, RecType recType, - void *recTypeClientData) + void *recTypeClientData, + int *more) { - SYSNO sysno0 = 0; + zint sysno0 = 0; RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int r; const char *matchStr = 0; Record rec; - off_t start_offset = 0; + off_t start_offset = 0, end_offset = 0; const char *pr_fname = fname; /* filename to print .. */ int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; @@ -592,7 +557,6 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extractCtrl.match_criteria[0] = '\0'; extractCtrl.staticrank = 0; - init_extractCtrl(zh, &extractCtrl); extract_set_store_data_prepare(&extractCtrl); @@ -618,20 +582,33 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; + + + end_offset = stream->endf(stream, 0); + + if (!end_offset) + end_offset = stream->tellf(stream); + else + stream->seekf(stream, end_offset); + } - if (!sysno) { + + *more = 1; + if (!sysno) + { sysno = &sysno0; if (match_criteria && *match_criteria) { matchStr = match_criteria; } else { if (zh->m_record_id && *zh->m_record_id) { - matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname, - zh->m_record_id); + matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname, + zh->m_record_id); if (!matchStr) { - yaz_log (YLOG_WARN, "Bad match criteria (recordID)"); + yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); return ZEBRA_FAIL; } } @@ -674,6 +651,11 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); + if (extractCtrl.staticrank < 0) + { + yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0"); + extractCtrl.staticrank = 0; + } recordAttr->staticrank = extractCtrl.staticrank; if (matchStr) @@ -683,10 +665,9 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, sizeof(*sysno), sysno); } - - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); + extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); zh->records_inserted++; } else @@ -707,6 +688,10 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); + /* decrease total size */ + zebraExplain_recordBytesIncrement (zh->reg->zei, + - recordAttr->recordSize); + zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], @@ -716,9 +701,9 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, rec->size[recInfo_sortKeys], 0); - extract_flushSortKeys (zh, *sysno, 0, sortKeys); - extract_flushRecordKeys (zh, *sysno, 0, delkeys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 0, sortKeys); + extract_flush_record_keys(zh, *sysno, 0, delkeys, + recordAttr->staticrank); if (delete_flag) { /* record going to be deleted */ @@ -742,19 +727,19 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } rec_del (zh->reg->records, &rec); } - rec_rm (&rec); + rec_free(&rec); logRecord(zh); return ZEBRA_OK; } else { if (show_progress) - yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, - pr_fname, (zint) ZINT_FORMAT); + yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); + extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); @@ -790,18 +775,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); - /* save file size of original record */ - zebraExplain_recordBytesIncrement (zh->reg->zei, - - recordAttr->recordSize); if (stream) { - off_t end_offset = stream->endf(stream, 0); - - if (!end_offset) - end_offset = stream->tellf(stream); - else - stream->seekf(stream, end_offset); - recordAttr->recordSize = end_offset - start_offset; zebraExplain_recordBytesIncrement(zh->reg->zei, recordAttr->recordSize); @@ -820,6 +795,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, rec->size[recInfo_storeData] = zh->store_data_size; rec->info[recInfo_storeData] = zh->store_data_buf; zh->store_data_buf = 0; + recordAttr->recordSize = zh->store_data_size; } else if (zh->m_store_data) { @@ -896,19 +872,19 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); - extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); + extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0); zebra_rec_keys_close(delkeys); zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); - extract_flushSortKeys (zh, rec->sysno, 0, sortkeys); + extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys); zebra_rec_keys_close(sortkeys); } - extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); - extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); - + extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0); + extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys); + xfree (rec->info[recInfo_delKeys]); zebra_rec_keys_get_buf(zh->reg->keys, &rec->info[recInfo_delKeys], @@ -921,6 +897,70 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) return ZEBRA_OK; } +void extract_rec_keys_log(ZebraHandle zh, int is_insert, + zebra_rec_keys_t reckeys, + int level) +{ + if (zebra_rec_keys_rewind(reckeys)) + { + size_t slen; + const char *str; + struct it_key key; + NMEM nmem = nmem_create(); + + while(zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + char keystr[200]; /* room for zints to print */ + char *dst_term = 0; + int ord = CAST_ZINT_TO_INT(key.mem[0]); + int index_type, i; + const char *string_index; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, &string_index); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + *keystr = '\0'; + for (i = 0; ireg->zei; extract_rec_keys_adjust(zh, cmd, reckeys); - if (!zh->reg->key_buf) + if (log_level_details) { - int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); - if (mem <= 0) - { - yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB"); - mem= 1024*1024*8; - } - /* FIXME: That "8" should be in a default settings include */ - /* not hard-coded here! -H */ - zh->reg->key_buf = (char**) xmalloc (mem); - zh->reg->ptr_top = mem/sizeof(char*); - zh->reg->ptr_i = 0; - zh->reg->key_buf_used = 0; - zh->reg->key_file_no = 0; + yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s", + sysno, cmd ? "insert" : "delete"); + extract_rec_keys_log(zh, cmd, reckeys, log_level_details); + } + + if (!zh->reg->key_block) + { + int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8")); + const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", "."); + int use_threads = atoi(res_get_def (zh->res, "threads", "1")); + zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads); } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); @@ -1007,198 +1044,17 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, struct it_key key_in; while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { - int ch = 0; - int i, j = 0; - struct it_key key_out; - - assert(key_in.len >= 2); - assert(key_in.len <= IT_KEY_LEVEL_MAX); - - /* check for buffer overflow */ - if (zh->reg->key_buf_used + 1024 > - (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) - extract_flushWriteKeys (zh, 0); - - ++(zh->reg->ptr_i); - assert(zh->reg->ptr_i > 0); - (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = - (char*)zh->reg->key_buf + zh->reg->key_buf_used; - - /* key_in.mem[0] ord/ch */ - /* key_in.mem[1] filter specified record ID */ - - /* encode the ordinal value (field/use/attribute) .. */ - ch = CAST_ZINT_TO_INT(key_in.mem[0]); - zh->reg->key_buf_used += - key_SU_encode(ch, (char*)zh->reg->key_buf + - zh->reg->key_buf_used); - - /* copy the 0-terminated stuff from str to output */ - memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen); - zh->reg->key_buf_used += slen; - ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0'; - - /* the delete/insert indicator */ - ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd; - - if (zh->m_staticrank) /* rank config enabled ? */ - { - if (staticrank < 0) - { - yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0", - (long) staticrank); - staticrank = 0; - } - key_out.mem[j++] = staticrank; - } - - if (key_in.mem[1]) /* filter specified record ID */ - key_out.mem[j++] = key_in.mem[1]; - else - key_out.mem[j++] = sysno; - for (i = 2; i < key_in.len; i++) - key_out.mem[j++] = key_in.mem[i]; - key_out.len = j; - - memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, - &key_out, sizeof(key_out)); - (zh->reg->key_buf_used) += sizeof(key_out); + key_block_write(zh->reg->key_block, sysno, + &key_in, cmd, str, slen, + staticrank, zh->m_staticrank); } } } -void extract_flushWriteKeys (ZebraHandle zh, int final) - /* optimizing: if final=1, and no files written yet */ - /* push the keys directly to merge, sidestepping the */ - /* temp file altogether. Speeds small updates */ -{ - FILE *outf; - char out_fname[200]; - char *prevcp, *cp; - struct encode_info encode_info; - int ptr_i = zh->reg->ptr_i; - int temp_policy; -#if SORT_EXTRA - int i; -#endif - if (!zh->reg->key_buf || ptr_i <= 0) - { - yaz_log(log_level, " nothing to flush section=%d buf=%p i=%d", - zh->reg->key_file_no, zh->reg->key_buf, ptr_i); - return; - } - - (zh->reg->key_file_no)++; - yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no)); - yaz_log(log_level, " sort_buff at %p n=%d", - zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i); -#if !SORT_EXTRA - qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, - sizeof(char*), key_qsort_compare); - - /* zebra.cfg: tempfiles: - Y: always use temp files (old way) - A: use temp files, if more than one (auto) - = if this is both the last and the first - N: never bother with temp files (new) */ - - temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]); - if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') { - yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ", - temp_policy); - temp_policy='A'; - } - - if ( ( temp_policy =='N' ) || /* always from memory */ - ( ( temp_policy =='A' ) && /* automatic */ - (zh->reg->key_file_no == 1) && /* this is first time */ - (final) ) ) /* and last (=only) time */ - { /* go directly from memory */ - zh->reg->key_file_no =0; /* signal not to read files */ - zebra_index_merge(zh); - zh->reg->ptr_i = 0; - zh->reg->key_buf_used = 0; - return; - } - - /* Not doing directly from memory, write into a temp file */ - extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no); - - if (!(outf = fopen (out_fname, "wb"))) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname); - exit (1); - } - yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no); - prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; - - encode_key_init (&encode_info); - encode_key_write (cp, &encode_info, outf); - - while (--ptr_i > 0) - { - cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; - if (strcmp (cp, prevcp)) - { - encode_key_flush ( &encode_info, outf); - encode_key_init (&encode_info); - encode_key_write (cp, &encode_info, outf); - prevcp = cp; - } - else - encode_key_write (cp + strlen(cp), &encode_info, outf); - } - encode_key_flush ( &encode_info, outf); -#else - qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); - extract_get_fname_tmp (out_fname, key_file_no); - - if (!(outf = fopen (out_fname, "wb"))) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname); - exit (1); - } - yaz_log (YLOG_LOG, "writing section %d", key_file_no); - i = ptr_i; - prevcp = key_buf[ptr_top-i]; - while (1) - if (!--i || strcmp (prevcp, key_buf[ptr_top-i])) - { - key_y_len = strlen(prevcp)+1; -#if 0 - yaz_log (YLOG_LOG, "key_y_len: %2d %02x %02x %s", - key_y_len, prevcp[0], prevcp[1], 2+prevcp); -#endif - qsort (key_buf + ptr_top-ptr_i, ptr_i - i, - sizeof(char*), key_y_compare); - cp = key_buf[ptr_top-ptr_i]; - --key_y_len; - encode_key_init (&encode_info); - encode_key_write (cp, &encode_info, outf); - while (--ptr_i > i) - { - cp = key_buf[ptr_top-ptr_i]; - encode_key_write (cp+key_y_len, &encode_info, outf); - } - encode_key_flush ( &encode_info, outf); - if (!i) - break; - prevcp = key_buf[ptr_top-ptr_i]; - } -#endif - if (fclose (outf)) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname); - exit (1); - } - yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no); - zh->reg->ptr_i = 0; - zh->reg->key_buf_used = 0; -} -ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, - zebra_rec_keys_t reckeys, - zebra_snippets *snippets) +ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, + zebra_rec_keys_t reckeys, + zebra_snippets *snippets) { NMEM nmem = nmem_create(); if (zebra_rec_keys_rewind(reckeys)) @@ -1208,8 +1064,7 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct it_key key; while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) { - char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; + char *dst_term = 0; int ord; zint seqno; int index_type; @@ -1329,11 +1184,12 @@ static void extract_add_string(RecWord *p, const char *string, int length) } } -static void extract_add_incomplete_field (RecWord *p) +static void extract_add_incomplete_field(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; int remain = p->term_len; + int first = 1; const char **map = 0; if (remain > 0) @@ -1371,6 +1227,17 @@ static void extract_add_incomplete_field (RecWord *p) } if (!i) return; + + if (first) + { + first = 0; + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); + p->seqno++; + } + } extract_add_string (p, buf, i); p->seqno++; } @@ -1440,11 +1307,14 @@ static void extract_token_add(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; WRBUF wrbuf; - if (log_level) - yaz_log(log_level, "extract_token_add " + + if (log_level_extract) + { + yaz_log(log_level_extract, "extract_token_add " "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s", p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); + } if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0, p->term_buf, p->term_len))) { @@ -1482,93 +1352,43 @@ static void extract_set_store_data_prepare(struct recExtractCtrl *p) p->setStoreData = extract_set_store_data_cb; } -static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid) +static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) { ZebraHandle zh = (ZebraHandle) p->handle; zebraExplain_addSchema (zh->reg->zei, oid); } -void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t reckeys) +void extract_flush_sort_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t reckeys) { +#if 0 + yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT, + cmd, sysno); + extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG); +#endif + if (zebra_rec_keys_rewind(reckeys)) { - SortIdx sortIdx = zh->reg->sortIdx; + zebra_sort_index_t si = zh->reg->sort_index; size_t slen; const char *str; struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - sortIdx_type(sortIdx, ord); + zebra_sort_type(si, ord); if (cmd == 1) - sortIdx_add(sortIdx, str, slen); + zebra_sort_add(si, str, slen); else - sortIdx_add(sortIdx, "", 1); + zebra_sort_delete(si); } } } -static void encode_key_init(struct encode_info *i) -{ - i->encode_handle = iscz1_start(); - i->decode_handle = iscz1_start(); -} - -static void encode_key_write (char *k, struct encode_info *i, FILE *outf) -{ - struct it_key key; - char *bp = i->buf, *bp0; - const char *src = (char *) &key; - - /* copy term to output buf */ - while ((*bp++ = *k++)) - ; - /* and copy & align key so we can mangle */ - memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */ - -#if 0 - /* debugging */ - key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d"); -#endif - assert(key.mem[0] >= 0); - - bp0 = bp++; - iscz1_encode(i->encode_handle, &bp, &src); - - *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */ - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); - } - -#if 0 - /* debugging */ - if (1) - { - struct it_key key2; - const char *src = bp0+1; - char *dst = (char*) &key2; - iscz1_decode(i->decode_handle, &dst, &src); - - key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d"); - - assert(key2.mem[1]); - } -#endif -} - -static void encode_key_flush (struct encode_info *i, FILE *outf) -{ - iscz1_stop(i->encode_handle); - iscz1_stop(i->decode_handle); -} - /* * Local variables: * c-basic-offset: 4