X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=903b7c5f2f4b1cf3eedaf8fcdbd001c8b93650ad;hb=6b553b7b42f0a2940c9765b9811e5db44ba8265f;hp=0a1d8c98d16812efc0a286f109f6b459fd7c85f5;hpb=ca39c711e9398043092cceeb3b4a7d8a0db1c0d8;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 0a1d8c9..903b7c5 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.239 2006-11-21 14:32:38 adam Exp $ +/* $Id: extract.c,v 1.244 2006-12-05 08:14:47 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -51,11 +51,11 @@ static void zebra_init_log_level(void) } } -static void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t reckeys, - zint staticrank); -static void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t skp); +static void extract_flush_record_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t reckeys, + zint staticrank); +static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t skp); static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid); static void extract_token_add (RecWord *p); @@ -139,9 +139,9 @@ static void searchRecordKey(ZebraHandle zh, #define FILE_MATCH_BLANK "\t " -static char *fileMatchStr (ZebraHandle zh, - zebra_rec_keys_t reckeys, - const char *fname, const char *spec) +static char *get_match_from_spec(ZebraHandle zh, + zebra_rec_keys_t reckeys, + const char *fname, const char *spec) { static char dstBuf[2048]; /* static here ??? */ char *dst = dstBuf; @@ -309,7 +309,7 @@ static void all_matches_add(struct recExtractCtrl *ctrl) "", 0); } -ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, +ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, int deleteFlag) { ZEBRA_RES r = ZEBRA_OK; @@ -406,6 +406,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, } while(1) { + int more = 0; r = zebra_extract_record_stream(zh, streamp, deleteFlag, 0, /* tst_mode */ @@ -415,11 +416,9 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, SYSNO *sysno, const char *fname, fname, 1, /* force_update */ 1, /* allow_update */ - recType, recTypeClientData); - if (r != ZEBRA_OK) - { - break; - } + recType, recTypeClientData, &more); + if (!more) + break; if (sysno) { break; @@ -443,7 +442,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, int delete_flag, int test_mode, const char *recordType, - SYSNO *sysno, + zint *sysno, const char *match_criteria, const char *fname, int force_update, @@ -453,6 +452,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, ZEBRA_RES res; void *clientData; RecType recType = 0; + int more = 0; if (recordType && *recordType) { @@ -481,8 +481,6 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, return ZEBRA_FAIL; } - - zebra_create_stream_mem(&stream, buf, buf_size); res = zebra_extract_record_stream(zh, &stream, @@ -494,7 +492,7 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, fname, force_update, allow_update, - recType, clientData); + recType, clientData, &more); stream.destroy(&stream); return res; } @@ -505,22 +503,23 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, int delete_flag, int test_mode, const char *recordType, - SYSNO *sysno, + zint *sysno, const char *match_criteria, const char *fname, int force_update, int allow_update, RecType recType, - void *recTypeClientData) + void *recTypeClientData, + int *more) { - SYSNO sysno0 = 0; + zint sysno0 = 0; RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int r; const char *matchStr = 0; Record rec; - off_t start_offset = 0; + off_t start_offset = 0, end_offset = 0; const char *pr_fname = fname; /* filename to print .. */ int show_progress = zh->records_processed < zh->m_file_verbose_limit ? 1:0; @@ -558,7 +557,6 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extractCtrl.match_criteria[0] = '\0'; extractCtrl.staticrank = 0; - init_extractCtrl(zh, &extractCtrl); extract_set_store_data_prepare(&extractCtrl); @@ -584,20 +582,33 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; + + + end_offset = stream->endf(stream, 0); + + if (!end_offset) + end_offset = stream->tellf(stream); + else + stream->seekf(stream, end_offset); + } - if (!sysno) { + + *more = 1; + if (!sysno) + { sysno = &sysno0; if (match_criteria && *match_criteria) { matchStr = match_criteria; } else { if (zh->m_record_id && *zh->m_record_id) { - matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname, - zh->m_record_id); + matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname, + zh->m_record_id); if (!matchStr) { - yaz_log (YLOG_WARN, "Bad match criteria (recordID)"); + yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); return ZEBRA_FAIL; } } @@ -640,6 +651,11 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); + if (extractCtrl.staticrank < 0) + { + yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0"); + extractCtrl.staticrank = 0; + } recordAttr->staticrank = extractCtrl.staticrank; if (matchStr) @@ -649,10 +665,9 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, sizeof(*sysno), sysno); } - - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); + extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); zh->records_inserted++; } else @@ -673,6 +688,10 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); + /* decrease total size */ + zebraExplain_recordBytesIncrement (zh->reg->zei, + - recordAttr->recordSize); + zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], @@ -682,9 +701,9 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, rec->size[recInfo_sortKeys], 0); - extract_flushSortKeys (zh, *sysno, 0, sortKeys); - extract_flushRecordKeys (zh, *sysno, 0, delkeys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 0, sortKeys); + extract_flush_record_keys(zh, *sysno, 0, delkeys, + recordAttr->staticrank); if (delete_flag) { /* record going to be deleted */ @@ -715,12 +734,12 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, else { if (show_progress) - yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, - pr_fname, (zint) ZINT_FORMAT); + yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys); + extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); @@ -756,18 +775,8 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); - /* save file size of original record */ - zebraExplain_recordBytesIncrement (zh->reg->zei, - - recordAttr->recordSize); if (stream) { - off_t end_offset = stream->endf(stream, 0); - - if (!end_offset) - end_offset = stream->tellf(stream); - else - stream->seekf(stream, end_offset); - recordAttr->recordSize = end_offset - start_offset; zebraExplain_recordBytesIncrement(zh->reg->zei, recordAttr->recordSize); @@ -863,19 +872,19 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); - extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); + extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0); zebra_rec_keys_close(delkeys); zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); - extract_flushSortKeys (zh, rec->sysno, 0, sortkeys); + extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys); zebra_rec_keys_close(sortkeys); } - extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); - extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); - + extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0); + extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys); + xfree (rec->info[recInfo_delKeys]); zebra_rec_keys_get_buf(zh->reg->keys, &rec->info[recInfo_delKeys], @@ -1004,9 +1013,9 @@ void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, } } -void extract_flushRecordKeys(ZebraHandle zh, SYSNO sysno, int cmd, - zebra_rec_keys_t reckeys, - zint staticrank) +void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd, + zebra_rec_keys_t reckeys, + zint staticrank) { ZebraExplainInfo zei = zh->reg->zei; @@ -1021,9 +1030,10 @@ void extract_flushRecordKeys(ZebraHandle zh, SYSNO sysno, int cmd, if (!zh->reg->key_block) { - int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); + int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8")); const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", "."); - zh->reg->key_block = key_block_create(mem, key_tmp_dir); + int use_threads = atoi(res_get_def (zh->res, "threads", "1")); + zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads); } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); @@ -1180,20 +1190,12 @@ static void extract_add_incomplete_field(RecWord *p) ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; int remain = p->term_len; + int first = 1; const char **map = 0; if (remain > 0) map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); - if (map) - { - if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) - { - /* first in field marker */ - extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); - p->seqno++; - } - } while (map) { char buf[IT_MAX_WORD+1]; @@ -1226,6 +1228,17 @@ static void extract_add_incomplete_field(RecWord *p) } if (!i) return; + + if (first) + { + first = 0; + if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type)) + { + /* first in field marker */ + extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN); + p->seqno++; + } + } extract_add_string (p, buf, i); p->seqno++; } @@ -1346,8 +1359,8 @@ static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid) zebraExplain_addSchema (zh->reg->zei, oid); } -void extract_flushSortKeys(ZebraHandle zh, SYSNO sysno, - int cmd, zebra_rec_keys_t reckeys) +void extract_flush_sort_keys(ZebraHandle zh, zint sysno, + int cmd, zebra_rec_keys_t reckeys) { if (zebra_rec_keys_rewind(reckeys)) {