X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=33ee2e5017f9b6142d21526522c43761d52ada91;hb=8d54ef98fc7b1e31c4b4aa0ad21980bdb14d739c;hp=f7dbde39e879c6c9afac77c2a3a0551278247129;hpb=1e6d660cabb2425e31552deb2161b72435b27d5b;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index f7dbde3..33ee2e5 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.232 2006-10-29 20:35:58 adam Exp $ +/* $Id: extract.c,v 1.238 2006-11-20 13:59:13 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -43,15 +43,18 @@ struct encode_info { char buf[ENCODE_BUFLEN]; }; -static int log_level = 0; +static int log_level_extract = 0; +static int log_level_details = 0; static int log_level_initialized = 0; static void zebra_init_log_level(void) { if (!log_level_initialized) { - log_level = yaz_log_module_level("extract"); log_level_initialized = 1; + + log_level_extract = yaz_log_module_level("extract"); + log_level_details = yaz_log_module_level("indexdetails"); } } @@ -365,7 +368,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, else sprintf (gprefix, "%s.", zh->m_group); - yaz_log(log_level, "zebra_extract_file %s", fname); + yaz_log(log_level_extract, "zebra_extract_file %s", fname); /* determine file extension */ *ext = '\0'; @@ -441,8 +444,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, } while(1) { - off_t prev_off = streamp->tellf(streamp); - r = zebra_extract_record_stream(zh, streamp, deleteFlag, 0, /* tst_mode */ @@ -461,8 +462,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, { break; } - if (prev_off == streamp->tellf(streamp)) - break; } if (streamp) stream.destroy(streamp); @@ -495,7 +494,8 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, if (recordType && *recordType) { - yaz_log(log_level, "Record type explicitly specified: %s", recordType); + yaz_log(log_level_extract, + "Record type explicitly specified: %s", recordType); recType = recType_byName (zh->reg->recTypes, zh->res, recordType, &clientData); } @@ -506,7 +506,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "No such record type defined"); return ZEBRA_FAIL; } - yaz_log(log_level, "Get record type from rgroup: %s", + yaz_log(log_level_extract, "Get record type from rgroup: %s", zh->m_record_type); recType = recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type, &clientData); @@ -824,6 +824,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, rec->size[recInfo_storeData] = zh->store_data_size; rec->info[recInfo_storeData] = zh->store_data_buf; zh->store_data_buf = 0; + recordAttr->recordSize = zh->store_data_size; } else if (zh->m_store_data) { @@ -925,6 +926,70 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) return ZEBRA_OK; } +void extract_rec_keys_log(ZebraHandle zh, int is_insert, + zebra_rec_keys_t reckeys, + int level) +{ + if (zebra_rec_keys_rewind(reckeys)) + { + size_t slen; + const char *str; + struct it_key key; + NMEM nmem = nmem_create(); + + while(zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + char keystr[200]; /* room for zints to print */ + char *dst_term = 0; + int ord = CAST_ZINT_TO_INT(key.mem[0]); + int index_type, i; + const char *string_index; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, &string_index); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + *keystr = '\0'; + for (i = 0; ireg->key_buf) { int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); @@ -1082,24 +1154,26 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) struct encode_info encode_info; int ptr_i = zh->reg->ptr_i; int temp_policy; -#if SORT_EXTRA - int i; -#endif if (!zh->reg->key_buf || ptr_i <= 0) { - yaz_log(log_level, " nothing to flush section=%d buf=%p i=%d", + yaz_log(log_level_extract, " nothing to flush section=%d buf=%p i=%d", zh->reg->key_file_no, zh->reg->key_buf, ptr_i); return; } (zh->reg->key_file_no)++; yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no)); - yaz_log(log_level, " sort_buff at %p n=%d", + yaz_log(log_level_extract, " sort_buff at %p n=%d", zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i); -#if !SORT_EXTRA - qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, - sizeof(char*), key_qsort_compare); + +#if USE_SHELLSORT + shellsort(zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, + sizeof(char*), key_qsort_compare); +#else + qsort(zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, + sizeof(char*), key_qsort_compare); +#endif /* zebra.cfg: tempfiles: Y: always use temp files (old way) A: use temp files, if more than one (auto) @@ -1131,7 +1205,7 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) if (!(outf = fopen (out_fname, "wb"))) { yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname); - exit (1); + zebra_exit("extract_flushWriteKeys"); } yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no); prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; @@ -1153,56 +1227,19 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) encode_key_write (cp + strlen(cp), &encode_info, outf); } encode_key_flush ( &encode_info, outf); -#else - qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); - extract_get_fname_tmp (out_fname, key_file_no); - - if (!(outf = fopen (out_fname, "wb"))) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname); - exit (1); - } - yaz_log (YLOG_LOG, "writing section %d", key_file_no); - i = ptr_i; - prevcp = key_buf[ptr_top-i]; - while (1) - if (!--i || strcmp (prevcp, key_buf[ptr_top-i])) - { - key_y_len = strlen(prevcp)+1; -#if 0 - yaz_log (YLOG_LOG, "key_y_len: %2d %02x %02x %s", - key_y_len, prevcp[0], prevcp[1], 2+prevcp); -#endif - qsort (key_buf + ptr_top-ptr_i, ptr_i - i, - sizeof(char*), key_y_compare); - cp = key_buf[ptr_top-ptr_i]; - --key_y_len; - encode_key_init (&encode_info); - encode_key_write (cp, &encode_info, outf); - while (--ptr_i > i) - { - cp = key_buf[ptr_top-ptr_i]; - encode_key_write (cp+key_y_len, &encode_info, outf); - } - encode_key_flush ( &encode_info, outf); - if (!i) - break; - prevcp = key_buf[ptr_top-ptr_i]; - } -#endif if (fclose (outf)) { yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname); - exit (1); + zebra_exit("extract_flushWriteKeys"); } yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no); zh->reg->ptr_i = 0; zh->reg->key_buf_used = 0; } -ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, - zebra_rec_keys_t reckeys, - zebra_snippets *snippets) +ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, + zebra_rec_keys_t reckeys, + zebra_snippets *snippets) { NMEM nmem = nmem_create(); if (zebra_rec_keys_rewind(reckeys)) @@ -1454,9 +1491,9 @@ static void extract_token_add(RecWord *p) ZebraHandle zh = p->extractCtrl->handle; WRBUF wrbuf; - if (log_level) + if (log_level_extract) { - yaz_log(log_level, "extract_token_add " + yaz_log(log_level_extract, "extract_token_add " "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s", p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); @@ -1560,7 +1597,7 @@ static void encode_key_write (char *k, struct encode_info *i, FILE *outf) if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) { yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); + zebra_exit("encode_key_write"); } #if 0