+ /* save file size of original record */
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ - recordAttr->recordSize);
+ if (stream)
+ {
+ off_t end_offset = stream->endf(stream, 0);
+
+ if (!end_offset)
+ end_offset = stream->tellf(stream);
+ else
+ stream->seekf(stream, end_offset);
+
+ recordAttr->recordSize = end_offset - start_offset;
+ zebraExplain_recordBytesIncrement(zh->reg->zei,
+ recordAttr->recordSize);
+ }
+
+ /* set run-number for this record */
+ recordAttr->runNumber =
+ zebraExplain_runNumberIncrement (zh->reg->zei, 0);
+
+ /* update store data */
+ xfree (rec->info[recInfo_storeData]);
+
+ /* update store data */
+ if (zh->store_data_buf)
+ {
+ rec->size[recInfo_storeData] = zh->store_data_size;
+ rec->info[recInfo_storeData] = zh->store_data_buf;
+ zh->store_data_buf = 0;
+ }
+ else if (zh->m_store_data)
+ {
+ off_t cur_offset = stream->tellf(stream);
+
+ rec->size[recInfo_storeData] = recordAttr->recordSize;
+ rec->info[recInfo_storeData] = (char *)
+ xmalloc (recordAttr->recordSize);
+ stream->seekf(stream, start_offset);
+ stream->readf(stream, rec->info[recInfo_storeData],
+ recordAttr->recordSize);
+ stream->seekf(stream, cur_offset);
+ }
+ else
+ {
+ rec->info[recInfo_storeData] = NULL;
+ rec->size[recInfo_storeData] = 0;
+ }
+ /* update database name */
+ xfree (rec->info[recInfo_databaseName]);
+ rec->info[recInfo_databaseName] =
+ rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
+
+ /* update offset */
+ recordAttr->recordOffset = start_offset;
+
+ /* commit this record */
+ rec_put (zh->reg->records, &rec);
+ logRecord(zh);
+ return ZEBRA_OK;
+}
+
+ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
+{
+ ZebraHandle zh = (ZebraHandle) handle;
+ struct recExtractCtrl extractCtrl;
+
+ if (zebraExplain_curDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName]))
+ {
+ abort();
+ if (zebraExplain_newDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName], 0))
+ abort ();
+ }
+
+ zebra_rec_keys_reset(zh->reg->keys);
+ zebra_rec_keys_reset(zh->reg->sortKeys);
+
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extractCtrl.flagShowRecords = 0;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+ extractCtrl.handle = handle;
+ extractCtrl.first_record = 1;
+
+ extract_set_store_data_prepare(&extractCtrl);
+
+ if (n)
+ grs_extract_tree(&extractCtrl, n);
+
+ if (rec->size[recInfo_delKeys])
+ {
+ zebra_rec_keys_t delkeys = zebra_rec_keys_open();
+
+ zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
+
+ zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys],
+ 0);
+ extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
+ zebra_rec_keys_close(delkeys);
+
+ zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+
+ extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
+ zebra_rec_keys_close(sortkeys);
+ }
+ extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
+ extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
+
+ xfree (rec->info[recInfo_delKeys]);
+ zebra_rec_keys_get_buf(zh->reg->keys,
+ &rec->info[recInfo_delKeys],
+ &rec->size[recInfo_delKeys]);
+
+ xfree (rec->info[recInfo_sortKeys]);
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+ return ZEBRA_OK;
+}
+
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ struct ord_stat {
+ int no;
+ int ord;
+ struct ord_stat *next;
+ };
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ struct ord_stat *ord_list = 0;
+ struct ord_stat *p;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
+
+ for (p = ord_list; p ; p = p->next)
+ if (p->ord == ord)
+ {
+ p->no++;
+ break;
+ }
+ if (!p)
+ {
+ p = xmalloc(sizeof(*p));
+ p->no = 1;
+ p->ord = ord;
+ p->next = ord_list;
+ ord_list = p;
+ }
+ }
+
+ p = ord_list;
+ while (p)
+ {
+ struct ord_stat *p1 = p;
+
+ if (is_insert)
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+ else
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+ p = p->next;
+ xfree(p1);
+ }
+ }
+}
+
+void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
+ int cmd,
+ zebra_rec_keys_t reckeys,
+ zint staticrank)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+
+ extract_rec_keys_adjust(zh, cmd, reckeys);
+
+ if (!zh->reg->key_buf)
+ {
+ int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
+ if (mem <= 0)
+ {
+ yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB");
+ mem= 1024*1024*8;
+ }
+ /* FIXME: That "8" should be in a default settings include */
+ /* not hard-coded here! -H */
+ zh->reg->key_buf = (char**) xmalloc (mem);
+ zh->reg->ptr_top = mem/sizeof(char*);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ zh->reg->key_file_no = 0;
+ }
+ zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ch = 0;
+ int i, j = 0;
+ struct it_key key_out;
+
+ assert(key_in.len >= 2);
+ assert(key_in.len <= IT_KEY_LEVEL_MAX);
+
+ /* check for buffer overflow */
+ if (zh->reg->key_buf_used + 1024 >
+ (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
+ extract_flushWriteKeys (zh, 0);
+
+ ++(zh->reg->ptr_i);
+ assert(zh->reg->ptr_i > 0);
+ (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
+ (char*)zh->reg->key_buf + zh->reg->key_buf_used;
+
+ /* key_in.mem[0] ord/ch */
+ /* key_in.mem[1] filter specified record ID */
+
+ /* encode the ordinal value (field/use/attribute) .. */
+ ch = CAST_ZINT_TO_INT(key_in.mem[0]);
+ zh->reg->key_buf_used +=
+ key_SU_encode(ch, (char*)zh->reg->key_buf +
+ zh->reg->key_buf_used);
+
+ /* copy the 0-terminated stuff from str to output */
+ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
+ zh->reg->key_buf_used += slen;
+ ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0';
+
+ /* the delete/insert indicator */
+ ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd;
+
+ if (zh->m_staticrank) /* rank config enabled ? */
+ {
+ if (staticrank < 0)
+ {
+ yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
+ (long) staticrank);
+ staticrank = 0;
+ }
+ key_out.mem[j++] = staticrank;
+ }
+
+ if (key_in.mem[1]) /* filter specified record ID */
+ key_out.mem[j++] = key_in.mem[1];
+ else
+ key_out.mem[j++] = sysno;
+ for (i = 2; i < key_in.len; i++)
+ key_out.mem[j++] = key_in.mem[i];
+ key_out.len = j;
+
+ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used,
+ &key_out, sizeof(key_out));
+ (zh->reg->key_buf_used) += sizeof(key_out);
+ }
+ }
+}
+
+void extract_flushWriteKeys (ZebraHandle zh, int final)
+ /* optimizing: if final=1, and no files written yet */
+ /* push the keys directly to merge, sidestepping the */
+ /* temp file altogether. Speeds small updates */
+{
+ FILE *outf;
+ char out_fname[200];
+ char *prevcp, *cp;
+ struct encode_info encode_info;
+ int ptr_i = zh->reg->ptr_i;
+ int temp_policy;
+#if SORT_EXTRA
+ int i;
+#endif
+ if (!zh->reg->key_buf || ptr_i <= 0)
+ {
+ yaz_log(log_level, " nothing to flush section=%d buf=%p i=%d",
+ zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
+ return;
+ }
+
+ (zh->reg->key_file_no)++;
+ yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no));
+ yaz_log(log_level, " sort_buff at %p n=%d",
+ zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
+#if !SORT_EXTRA
+ qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
+ sizeof(char*), key_qsort_compare);
+
+ /* zebra.cfg: tempfiles:
+ Y: always use temp files (old way)
+ A: use temp files, if more than one (auto)
+ = if this is both the last and the first
+ N: never bother with temp files (new) */
+
+ temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
+ if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
+ yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
+ temp_policy);
+ temp_policy='A';
+ }
+
+ if ( ( temp_policy =='N' ) || /* always from memory */
+ ( ( temp_policy =='A' ) && /* automatic */
+ (zh->reg->key_file_no == 1) && /* this is first time */
+ (final) ) ) /* and last (=only) time */
+ { /* go directly from memory */
+ zh->reg->key_file_no =0; /* signal not to read files */
+ zebra_index_merge(zh);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ return;
+ }
+
+ /* Not doing directly from memory, write into a temp file */
+ extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
+
+ if (!(outf = fopen (out_fname, "wb")))
+ {
+ yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
+ exit (1);
+ }
+ yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no);
+ prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
+
+ encode_key_init (&encode_info);
+ encode_key_write (cp, &encode_info, outf);
+
+ while (--ptr_i > 0)
+ {
+ cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
+ if (strcmp (cp, prevcp))