X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=c490210384b22b5f9c3701303f28c94c15c81734;hb=58b1837132acdc0f4683fef187b6a5b06534fa85;hp=3874dfe317fd3a1aab73366883d9b3f1640c2036;hpb=852d5f1f9aa0a70f7e54a68143ee86752394a2f2;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 3874dfe..c490210 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.228 2006-08-22 13:39:27 adam Exp $ +/* $Id: extract.c,v 1.235 2006-11-09 14:39:24 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -43,15 +43,18 @@ struct encode_info { char buf[ENCODE_BUFLEN]; }; -static int log_level = 0; -static int log_level_initialized = 1; +static int log_level_extract = 0; +static int log_level_details = 0; +static int log_level_initialized = 0; -static void zebra_init_log_level() +static void zebra_init_log_level(void) { if (!log_level_initialized) { - log_level = yaz_log_module_level("extract"); log_level_initialized = 1; + + log_level_extract = yaz_log_module_level("extract"); + log_level_details = yaz_log_module_level("indexdetails"); } } @@ -365,7 +368,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, else sprintf (gprefix, "%s.", zh->m_group); - yaz_log(log_level, "zebra_extract_file %s", fname); + yaz_log(log_level_extract, "zebra_extract_file %s", fname); /* determine file extension */ *ext = '\0'; @@ -491,7 +494,8 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, if (recordType && *recordType) { - yaz_log(log_level, "Record type explicitly specified: %s", recordType); + yaz_log(log_level_extract, + "Record type explicitly specified: %s", recordType); recType = recType_byName (zh->reg->recTypes, zh->res, recordType, &clientData); } @@ -502,7 +506,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "No such record type defined"); return ZEBRA_FAIL; } - yaz_log(log_level, "Get record type from rgroup: %s", + yaz_log(log_level_extract, "Get record type from rgroup: %s", zh->m_record_type); recType = recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type, &clientData); @@ -742,7 +746,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } rec_del (zh->reg->records, &rec); } - rec_rm (&rec); + rec_free(&rec); logRecord(zh); return ZEBRA_OK; } @@ -921,6 +925,70 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) return ZEBRA_OK; } +void extract_rec_keys_log(ZebraHandle zh, int is_insert, + zebra_rec_keys_t reckeys, + int level) +{ + if (zebra_rec_keys_rewind(reckeys)) + { + size_t slen; + const char *str; + struct it_key key; + NMEM nmem = nmem_create(); + + while(zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + char keystr[200]; /* room for zints to print */ + char *dst_term = 0; + int ord = CAST_ZINT_TO_INT(key.mem[0]); + int index_type, i; + const char *string_index; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, &string_index); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + *keystr = '\0'; + for (i = 0; ireg->key_buf) { int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); @@ -1083,14 +1158,14 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) #endif if (!zh->reg->key_buf || ptr_i <= 0) { - yaz_log(log_level, " nothing to flush section=%d buf=%p i=%d", + yaz_log(log_level_extract, " nothing to flush section=%d buf=%p i=%d", zh->reg->key_file_no, zh->reg->key_buf, ptr_i); return; } (zh->reg->key_file_no)++; yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no)); - yaz_log(log_level, " sort_buff at %p n=%d", + yaz_log(log_level_extract, " sort_buff at %p n=%d", zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i); #if !SORT_EXTRA qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, @@ -1196,9 +1271,9 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, - zebra_rec_keys_t reckeys, - zebra_snippets *snippets) +ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, + zebra_rec_keys_t reckeys, + zebra_snippets *snippets) { NMEM nmem = nmem_create(); if (zebra_rec_keys_rewind(reckeys)) @@ -1329,7 +1404,7 @@ static void extract_add_string(RecWord *p, const char *string, int length) } } -static void extract_add_incomplete_field (RecWord *p) +static void extract_add_incomplete_field(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; @@ -1339,6 +1414,15 @@ static void extract_add_incomplete_field (RecWord *p) if (remain > 0) map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); + if (map) + { + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); + p->seqno++; + } + } while (map) { char buf[IT_MAX_WORD+1]; @@ -1440,11 +1524,14 @@ static void extract_token_add(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; WRBUF wrbuf; - if (log_level) - yaz_log(log_level, "extract_token_add " + + if (log_level_extract) + { + yaz_log(log_level_extract, "extract_token_add " "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s", p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); + } if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0, p->term_buf, p->term_len))) {