+ {
+ /* record already exists */
+ zebra_rec_keys_t delkeys = zebra_rec_keys_open();
+ zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
+ if (action == action_insert)
+ {
+ yaz_log(YLOG_LOG, "skipped %s %s " ZINT_FORMAT,
+ recordType, pr_fname, (zint) start_offset);
+ logRecord(zh);
+ return ZEBRA_FAIL;
+ }
+
+ rec = rec_get(zh->reg->records, *sysno);
+ assert(rec);
+
+ if (stream)
+ {
+ all_matches_add(&extractCtrl,
+ zebra_rec_keys_get_custom_record_id(zh->reg->keys),
+ *sysno);
+ }
+
+ recordAttr = rec_init_attr(zh->reg->zei, rec);
+
+ /* decrease total size */
+ zebraExplain_recordBytesIncrement(zh->reg->zei,
+ - recordAttr->recordSize);
+
+ zebra_rec_keys_set_buf(delkeys,
+ rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys],
+ 0);
+ zebra_rec_keys_set_buf(sortKeys,
+ rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+
+ extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
+#if !FLUSH2
+ extract_flush_record_keys(zh, *sysno, 0, delkeys,
+ recordAttr->staticrank);
+#endif
+ if (action == action_delete || action == action_a_delete)
+ {
+ /* record going to be deleted */
+#if FLUSH2
+ extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank,
+ delkeys, recordAttr->staticrank);
+#endif
+ if (zebra_rec_keys_empty(delkeys))
+ {
+ yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ yaz_log(YLOG_WARN, "cannot delete file above, "
+ "storeKeys false (3)");
+ }
+ else
+ {
+ if (show_progress)
+ yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ zh->records_deleted++;
+ if (matchStr)
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+ }
+ rec_del(zh->reg->records, &rec);
+ }
+ zebra_rec_keys_close(delkeys);
+ zebra_rec_keys_close(sortKeys);
+ rec_free(&rec);
+ logRecord(zh);
+ return ZEBRA_OK;
+ }
+ else
+ { /* update or special_update */
+ if (show_progress)
+ yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
+ pr_fname, (zint) start_offset);
+ extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
+
+#if FLUSH2
+ extract_flush_record_keys2(zh, *sysno,
+ zh->reg->keys, extractCtrl.staticrank,
+ delkeys, recordAttr->staticrank);
+#else
+ extract_flush_record_keys(zh, *sysno, 1,
+ zh->reg->keys, extractCtrl.staticrank);
+#endif
+ recordAttr->staticrank = extractCtrl.staticrank;
+ zh->records_updated++;
+ }
+ zebra_rec_keys_close(delkeys);
+ zebra_rec_keys_close(sortKeys);
+ }
+ /* update file type */
+ xfree(rec->info[recInfo_fileType]);
+ rec->info[recInfo_fileType] =
+ rec_strdup(recordType, &rec->size[recInfo_fileType]);
+
+ /* update filename */
+ xfree(rec->info[recInfo_filename]);
+ rec->info[recInfo_filename] =
+ rec_strdup(fname, &rec->size[recInfo_filename]);
+
+ /* update delete keys */
+ xfree(rec->info[recInfo_delKeys]);
+ if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
+ {
+ zebra_rec_keys_get_buf(zh->reg->keys,
+ &rec->info[recInfo_delKeys],
+ &rec->size[recInfo_delKeys]);
+ }
+ else
+ {
+ rec->info[recInfo_delKeys] = NULL;
+ rec->size[recInfo_delKeys] = 0;
+ }
+ /* update sort keys */
+ xfree(rec->info[recInfo_sortKeys]);
+
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+
+ if (stream)
+ {
+ recordAttr->recordSize = end_offset - start_offset;
+ zebraExplain_recordBytesIncrement(zh->reg->zei,
+ recordAttr->recordSize);
+ }
+
+ /* set run-number for this record */
+ recordAttr->runNumber =
+ zebraExplain_runNumberIncrement(zh->reg->zei, 0);
+
+ /* update store data */
+ xfree(rec->info[recInfo_storeData]);
+
+ /* update store data */
+ if (zh->store_data_buf)
+ {
+ rec->size[recInfo_storeData] = zh->store_data_size;
+ rec->info[recInfo_storeData] = zh->store_data_buf;
+ zh->store_data_buf = 0;
+ recordAttr->recordSize = zh->store_data_size;
+ }
+ else if (zh->m_store_data)
+ {
+ off_t cur_offset = stream->tellf(stream);
+
+ rec->size[recInfo_storeData] = recordAttr->recordSize;
+ rec->info[recInfo_storeData] = (char *)
+ xmalloc(recordAttr->recordSize);
+ stream->seekf(stream, start_offset);
+ stream->readf(stream, rec->info[recInfo_storeData],
+ recordAttr->recordSize);
+ stream->seekf(stream, cur_offset);
+ }
+ else
+ {
+ rec->info[recInfo_storeData] = NULL;
+ rec->size[recInfo_storeData] = 0;
+ }
+ /* update database name */
+ xfree(rec->info[recInfo_databaseName]);
+ rec->info[recInfo_databaseName] =
+ rec_strdup(zh->basenames[0], &rec->size[recInfo_databaseName]);
+
+ /* update offset */
+ recordAttr->recordOffset = start_offset;
+
+ /* commit this record */
+ rec_put(zh->reg->records, &rec);
+ logRecord(zh);
+ return ZEBRA_OK;
+}
+
+ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
+{
+ ZebraHandle zh = (ZebraHandle) handle;
+ struct recExtractCtrl extractCtrl;
+
+ if (zebraExplain_curDatabase(zh->reg->zei,
+ rec->info[recInfo_databaseName]))
+ {
+ abort();
+ if (zebraExplain_newDatabase(zh->reg->zei,
+ rec->info[recInfo_databaseName], 0))
+ abort();
+ }
+
+ zebra_rec_keys_reset(zh->reg->keys);
+ zebra_rec_keys_reset(zh->reg->sortKeys);
+
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extractCtrl.flagShowRecords = 0;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+ extractCtrl.action = action_update;
+
+ extractCtrl.handle = handle;
+ extractCtrl.first_record = 1;
+
+ extract_set_store_data_prepare(&extractCtrl);
+
+ if (n)
+ grs_extract_tree(&extractCtrl, n);
+
+ if (rec->size[recInfo_delKeys])
+ {
+ zebra_rec_keys_t delkeys = zebra_rec_keys_open();
+
+ zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
+
+ zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys],
+ 0);
+#if FLUSH2
+ extract_flush_record_keys2(zh, rec->sysno,
+ zh->reg->keys, 0, delkeys, 0);
+#else
+ extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
+ extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
+#endif
+ zebra_rec_keys_close(delkeys);
+
+ zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+
+ extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
+ zebra_rec_keys_close(sortkeys);
+ }
+ else
+ {
+#if FLUSH2
+ extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0);
+#else
+ extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
+#endif
+ }
+ extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
+
+ xfree(rec->info[recInfo_delKeys]);
+ zebra_rec_keys_get_buf(zh->reg->keys,
+ &rec->info[recInfo_delKeys],
+ &rec->size[recInfo_delKeys]);
+
+ xfree(rec->info[recInfo_sortKeys]);
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+ return ZEBRA_OK;
+}
+
+void extract_rec_keys_log(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys,
+ int level)
+{
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ size_t slen;
+ const char *str;
+ struct it_key key;
+ NMEM nmem = nmem_create();
+
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key))
+ {
+ char keystr[200]; /* room for zints to print */
+ char *dst_term = 0;
+ int ord = CAST_ZINT_TO_INT(key.mem[0]);
+ const char *index_type;
+ int i;
+ const char *string_index;
+
+ zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
+ 0/* db */, &string_index);
+ assert(index_type);
+ zebra_term_untrans_iconv(zh, nmem, index_type,
+ &dst_term, str);
+ *keystr = '\0';
+ for (i = 0; i<key.len; i++)
+ {
+ sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key.mem[i]);
+ }
+
+ if (*str < CHR_BASE_CHAR)
+ {
+ int i;
+ char dst_buf[200]; /* room for special chars */
+
+ strcpy(dst_buf , "?");
+
+ if (!strcmp(str, ""))
+ strcpy(dst_buf, "alwaysmatches");
+ if (!strcmp(str, FIRST_IN_FIELD_STR))
+ strcpy(dst_buf, "firstinfield");
+ else if (!strcmp(str, CHR_UNKNOWN))
+ strcpy(dst_buf, "unknown");
+ else if (!strcmp(str, CHR_SPACE))
+ strcpy(dst_buf, "space");
+
+ for (i = 0; i<slen; i++)
+ {
+ sprintf(dst_buf + strlen(dst_buf), " %d", str[i] & 0xff);
+ }
+ yaz_log(level, "%s%s %s %s", keystr, index_type,
+ string_index, dst_buf);
+
+ }
+ else
+ yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
+ string_index, dst_term);
+
+ nmem_reset(nmem);
+ }
+ nmem_destroy(nmem);
+ }
+}
+
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ struct ord_stat {
+ int no;
+ int ord;
+ struct ord_stat *next;
+ };
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ struct ord_stat *ord_list = 0;
+ struct ord_stat *p;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+
+ for (p = ord_list; p ; p = p->next)
+ if (p->ord == ord)
+ {
+ p->no++;
+ break;
+ }
+ if (!p)
+ {
+ p = xmalloc(sizeof(*p));
+ p->no = 1;
+ p->ord = ord;
+ p->next = ord_list;
+ ord_list = p;
+ }
+ }
+
+ p = ord_list;
+ while (p)
+ {
+ struct ord_stat *p1 = p;
+
+ if (is_insert)
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+ else
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+ p = p->next;
+ xfree(p1);
+ }
+ }
+}
+
+void extract_flush_record_keys2(ZebraHandle zh, zint sysno,
+ zebra_rec_keys_t ins_keys, zint ins_rank,
+ zebra_rec_keys_t del_keys, zint del_rank)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ int normal = 0;
+ int optimized = 0;
+
+ if (!zh->reg->key_block)
+ {
+ int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
+ const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", ".");
+ int use_threads = atoi(res_get_def(zh->res, "threads", "1"));
+ zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
+ }
+
+ if (ins_keys)
+ {
+ extract_rec_keys_adjust(zh, 1, ins_keys);
+ if (!del_keys)
+ zebraExplain_recordCountIncrement(zei, 1);
+ zebra_rec_keys_rewind(ins_keys);
+ }
+ if (del_keys)
+ {
+ extract_rec_keys_adjust(zh, 0, del_keys);
+ if (!ins_keys)
+ zebraExplain_recordCountIncrement(zei, -1);
+ zebra_rec_keys_rewind(del_keys);
+ }
+
+ while (1)
+ {
+ size_t del_slen;
+ const char *del_str;
+ struct it_key del_key_in;
+ int del = 0;
+
+ size_t ins_slen;
+ const char *ins_str;
+ struct it_key ins_key_in;
+ int ins = 0;
+
+ if (del_keys)
+ del = zebra_rec_keys_read(del_keys, &del_str, &del_slen,
+ &del_key_in);
+ if (ins_keys)
+ ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen,
+ &ins_key_in);
+
+ if (del && ins && ins_rank == del_rank
+ && !key_compare(&del_key_in, &ins_key_in)
+ && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen))
+ {
+ optimized++;
+ continue;
+ }
+ if (!del && !ins)
+ break;
+
+ normal++;
+ if (del)
+ key_block_write(zh->reg->key_block, sysno,
+ &del_key_in, 0, del_str, del_slen,
+ del_rank, zh->m_staticrank);
+ if (ins)
+ key_block_write(zh->reg->key_block, sysno,
+ &ins_key_in, 1, ins_str, ins_slen,
+ ins_rank, zh->m_staticrank);
+ }
+ yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
+}
+
+
+ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ zebra_snippets *snippets)
+{
+ NMEM nmem = nmem_create();
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ const char *str;
+ size_t slen;
+ struct it_key key;
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
+ {
+ char *dst_term = 0;
+ int ord;
+ zint seqno;
+ const char *index_type;