+
+ if (! *sysno)
+ {
+ /* new record */
+ if (delete_flag)
+ {
+ yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
+ return ZEBRA_FAIL;
+ }
+ if (show_progress)
+ yaz_log (YLOG_LOG, "add %s %s %ld", recordType, pr_fname,
+ (long) recordOffset);
+ rec = rec_new (zh->reg->records);
+
+ *sysno = rec->sysno;
+
+ recordAttr = rec_init_attr (zh->reg->zei, rec);
+ recordAttr->staticrank = extractCtrl.staticrank;
+
+ if (matchStr)
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
+ sizeof(*sysno), sysno);
+ }
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+ extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+
+#if 0
+ print_rec_keys(zh, zh->reg->keys);
+#endif
+ extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
+ zh->records_inserted++;
+ }
+ else
+ {
+ /* record already exists */
+ zebra_rec_keys_t delkeys = zebra_rec_keys_open();
+#if NATTR
+ zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
+#else
+ struct sortKeys sortKeys;
+#endif
+
+ if (!allow_update)
+ {
+ yaz_log (YLOG_LOG, "skipped %s %s %ld",
+ recordType, pr_fname, (long) recordOffset);
+ logRecord(zh);
+ return ZEBRA_FAIL;
+ }
+
+ rec = rec_get (zh->reg->records, *sysno);
+ assert (rec);
+
+ recordAttr = rec_init_attr (zh->reg->zei, rec);
+
+ zebra_rec_keys_set_buf(delkeys,
+ rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys],
+ 0);
+#if NATTR
+ zebra_rec_keys_set_buf(sortKeys,
+ rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+#else
+ sortKeys.buf_used = rec->size[recInfo_sortKeys];
+ sortKeys.buf = rec->info[recInfo_sortKeys];
+#endif
+
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 0, sortKeys);
+#else
+ extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
+#endif
+ extract_flushRecordKeys (zh, *sysno, 0, delkeys,
+ recordAttr->staticrank);
+ if (delete_flag)
+ {
+ /* record going to be deleted */
+ if (zebra_rec_keys_empty(delkeys))
+ {
+ yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ yaz_log (YLOG_WARN, "cannot delete file above, "
+ "storeKeys false (3)");
+ }
+ else
+ {
+ if (show_progress)
+ yaz_log (YLOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ zh->records_deleted++;
+ if (matchStr)
+ {
+ int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
+ dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
+ }
+ rec_del (zh->reg->records, &rec);
+ }
+ rec_rm (&rec);
+ logRecord(zh);
+ return ZEBRA_OK;
+ }
+ else
+ {
+ if (show_progress)
+ yaz_log (YLOG_LOG, "update %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ recordAttr->staticrank = extractCtrl.staticrank;
+#if NATTR
+ extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
+#else
+ extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+#endif
+ extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys,
+ recordAttr->staticrank);
+ zh->records_updated++;
+ }
+ zebra_rec_keys_close(delkeys);
+#if NATTR
+ zebra_rec_keys_close(sortKeys);
+#endif
+ }
+ /* update file type */
+ xfree (rec->info[recInfo_fileType]);
+ rec->info[recInfo_fileType] =
+ rec_strdup (recordType, &rec->size[recInfo_fileType]);
+
+ /* update filename */
+ xfree (rec->info[recInfo_filename]);
+ rec->info[recInfo_filename] =
+ rec_strdup (fname, &rec->size[recInfo_filename]);
+
+ /* update delete keys */
+ xfree (rec->info[recInfo_delKeys]);
+ if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
+ {
+ zebra_rec_keys_get_buf(zh->reg->keys,
+ &rec->info[recInfo_delKeys],
+ &rec->size[recInfo_delKeys]);
+ }
+ else
+ {
+ rec->info[recInfo_delKeys] = NULL;
+ rec->size[recInfo_delKeys] = 0;
+ }
+ /* update sort keys */
+ xfree (rec->info[recInfo_sortKeys]);
+
+#if NATTR
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+#else
+ rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
+ rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
+ zh->reg->sortKeys.buf = NULL;
+ zh->reg->sortKeys.buf_max = 0;
+#endif
+
+ /* save file size of original record */
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ - recordAttr->recordSize);
+#if 0
+ recordAttr->recordSize = fi->file_moffset - recordOffset;
+ if (!recordAttr->recordSize)
+ recordAttr->recordSize = fi->file_max - recordOffset;
+#else
+ recordAttr->recordSize = buf_size;
+#endif
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ recordAttr->recordSize);
+
+ /* set run-number for this record */
+ recordAttr->runNumber =
+ zebraExplain_runNumberIncrement (zh->reg->zei, 0);
+
+ /* update store data */
+ xfree (rec->info[recInfo_storeData]);
+
+ /* update store data */
+ if (zh->store_data_buf)
+ {
+ rec->size[recInfo_storeData] = zh->store_data_size;
+ rec->info[recInfo_storeData] = zh->store_data_buf;
+ zh->store_data_buf = 0;
+ }
+ else if (zh->m_store_data)
+ {
+ rec->size[recInfo_storeData] = recordAttr->recordSize;
+ rec->info[recInfo_storeData] = (char *)
+ xmalloc (recordAttr->recordSize);
+ memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
+ }
+ else
+ {
+ rec->info[recInfo_storeData] = NULL;
+ rec->size[recInfo_storeData] = 0;
+ }
+ /* update database name */
+ xfree (rec->info[recInfo_databaseName]);
+ rec->info[recInfo_databaseName] =
+ rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
+
+ /* update offset */
+ recordAttr->recordOffset = recordOffset;
+
+ /* commit this record */
+ rec_put (zh->reg->records, &rec);
+ logRecord(zh);
+ return ZEBRA_OK;
+}
+
+int explain_extract (void *handle, Record rec, data1_node *n)
+{
+ ZebraHandle zh = (ZebraHandle) handle;
+ struct recExtractCtrl extractCtrl;
+
+ if (zebraExplain_curDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName]))
+ {
+ abort();
+ if (zebraExplain_newDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName], 0))
+ abort ();
+ }
+
+ zebra_rec_keys_reset(zh->reg->keys);
+
+#if NATTR
+ zebra_rec_keys_reset(zh->reg->sortKeys);
+#else
+ zh->reg->sortKeys.buf_used = 0;
+#endif
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+
+ init_extractCtrl(zh, &extractCtrl);
+
+ extractCtrl.flagShowRecords = 0;
+ extractCtrl.match_criteria[0] = '\0';
+ extractCtrl.staticrank = 0;
+ extractCtrl.handle = handle;
+ extractCtrl.first_record = 1;
+
+ extract_set_store_data_prepare(&extractCtrl);
+
+ if (n)
+ grs_extract_tree(&extractCtrl, n);
+
+ if (rec->size[recInfo_delKeys])
+ {
+ zebra_rec_keys_t delkeys = zebra_rec_keys_open();
+
+#if NATTR
+ zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
+#else
+ struct sortKeys sortkeys;
+#endif
+
+ zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
+ rec->size[recInfo_delKeys],
+ 0);
+ extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0);
+ zebra_rec_keys_close(delkeys);
+#if NATTR
+ zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
+ rec->size[recInfo_sortKeys],
+ 0);
+
+ extract_flushSortKeys (zh, rec->sysno, 0, sortkeys);
+ zebra_rec_keys_close(sortkeys);
+#else
+ sortkeys.buf_used = rec->size[recInfo_sortKeys];
+ sortkeys.buf = rec->info[recInfo_sortKeys];
+ extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
+#endif
+ }
+ extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0);
+#if NATTR
+ extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys);
+#else
+ extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
+#endif
+
+ xfree (rec->info[recInfo_delKeys]);
+ zebra_rec_keys_get_buf(zh->reg->keys,
+ &rec->info[recInfo_delKeys],
+ &rec->size[recInfo_delKeys]);
+
+ xfree (rec->info[recInfo_sortKeys]);
+#if NATTR
+ zebra_rec_keys_get_buf(zh->reg->sortKeys,
+ &rec->info[recInfo_sortKeys],
+ &rec->size[recInfo_sortKeys]);
+#else
+ rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
+ rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
+ zh->reg->sortKeys.buf = NULL;
+ zh->reg->sortKeys.buf_max = 0;
+#endif
+
+ return 0;
+}
+
+void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
+ int cmd,
+ zebra_rec_keys_t reckeys,
+ zint staticrank)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+
+ if (!zh->reg->key_buf)
+ {
+ int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
+ if (mem <= 0)
+ {
+ yaz_log(YLOG_WARN, "Invalid memory setting, using default 8 MB");
+ mem= 1024*1024*8;
+ }
+ /* FIXME: That "8" should be in a default settings include */
+ /* not hard-coded here! -H */
+ zh->reg->key_buf = (char**) xmalloc (mem);
+ zh->reg->ptr_top = mem/sizeof(char*);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ zh->reg->key_file_no = 0;
+ }
+ zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ch = 0;
+ struct it_key key_out;
+ zint *keyp = key_out.mem;
+
+ assert(key_in.len == 4);
+
+ /* check for buffer overflow */
+ if (zh->reg->key_buf_used + 1024 >
+ (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
+ extract_flushWriteKeys (zh, 0);
+
+ ++(zh->reg->ptr_i);
+ assert(zh->reg->ptr_i > 0);
+ (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
+ (char*)zh->reg->key_buf + zh->reg->key_buf_used;
+
+ /* encode the ordinal value (field/use/attribute) .. */
+ ch = (int) key_in.mem[0];
+ zh->reg->key_buf_used +=
+ key_SU_encode(ch, (char*)zh->reg->key_buf +
+ zh->reg->key_buf_used);
+
+ /* copy the 0-terminated stuff from str to output */
+ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
+ zh->reg->key_buf_used += slen;
+ ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0';
+
+ /* the delete/insert indicator */
+ ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd;
+
+ if (zh->m_staticrank) /* rank config enabled ? */
+ {
+ if (staticrank < 0)
+ {
+ yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0",
+ (long) staticrank);
+ staticrank = 0;
+ }
+ *keyp++ = staticrank;
+ key_out.len = 4;
+ }
+ else
+ key_out.len = 3;
+
+ if (key_in.mem[1]) /* filter specified record ID */
+ *keyp++ = key_in.mem[1];
+ else
+ *keyp++ = sysno;
+ *keyp++ = key_in.mem[2]; /* section_id */
+ *keyp++ = key_in.mem[3]; /* sequence .. */
+
+ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used,
+ &key_out, sizeof(key_out));
+ (zh->reg->key_buf_used) += sizeof(key_out);
+ }
+ }
+}
+
+void extract_flushWriteKeys (ZebraHandle zh, int final)
+ /* optimizing: if final=1, and no files written yet */
+ /* push the keys directly to merge, sidestepping the */
+ /* temp file altogether. Speeds small updates */
+{
+ FILE *outf;
+ char out_fname[200];
+ char *prevcp, *cp;
+ struct encode_info encode_info;
+ int ptr_i = zh->reg->ptr_i;
+ int temp_policy;
+#if SORT_EXTRA
+ int i;
+#endif
+ if (!zh->reg->key_buf || ptr_i <= 0)
+ {
+ yaz_log (YLOG_DEBUG, " nothing to flush section=%d buf=%p i=%d",
+ zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
+ yaz_log (YLOG_DEBUG, " buf=%p ",
+ zh->reg->key_buf);
+ yaz_log (YLOG_DEBUG, " ptr=%d ",zh->reg->ptr_i);
+ yaz_log (YLOG_DEBUG, " reg=%p ",zh->reg);
+
+ return;
+ }
+
+ (zh->reg->key_file_no)++;
+ yaz_log (YLOG_LOG, "sorting section %d", (zh->reg->key_file_no));
+ yaz_log (YLOG_DEBUG, " sort_buff at %p n=%d",
+ zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
+#if !SORT_EXTRA
+ qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
+ sizeof(char*), key_qsort_compare);
+
+ /* zebra.cfg: tempfiles:
+ Y: always use temp files (old way)
+ A: use temp files, if more than one (auto)
+ = if this is both the last and the first
+ N: never bother with temp files (new) */
+
+ temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
+ if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
+ yaz_log (YLOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
+ temp_policy);
+ temp_policy='A';
+ }
+
+ if ( ( temp_policy =='N' ) || /* always from memory */
+ ( ( temp_policy =='A' ) && /* automatic */
+ (zh->reg->key_file_no == 1) && /* this is first time */
+ (final) ) ) /* and last (=only) time */
+ { /* go directly from memory */
+ zh->reg->key_file_no =0; /* signal not to read files */
+ zebra_index_merge(zh);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ return;
+ }
+
+ /* Not doing directly from memory, write into a temp file */
+ extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
+
+ if (!(outf = fopen (out_fname, "wb")))
+ {
+ yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
+ exit (1);
+ }
+ yaz_log (YLOG_LOG, "writing section %d", zh->reg->key_file_no);
+ prevcp = cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
+
+ encode_key_init (&encode_info);
+ encode_key_write (cp, &encode_info, outf);
+
+ while (--ptr_i > 0)
+ {
+ cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i];
+ if (strcmp (cp, prevcp))
+ {
+ encode_key_flush ( &encode_info, outf);
+ encode_key_init (&encode_info);
+ encode_key_write (cp, &encode_info, outf);
+ prevcp = cp;
+ }
+ else
+ encode_key_write (cp + strlen(cp), &encode_info, outf);
+ }
+ encode_key_flush ( &encode_info, outf);
+#else
+ qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare);
+ extract_get_fname_tmp (out_fname, key_file_no);
+
+ if (!(outf = fopen (out_fname, "wb")))
+ {
+ yaz_log (YLOG_FATAL|YLOG_ERRNO, "fopen %s", out_fname);
+ exit (1);
+ }
+ yaz_log (YLOG_LOG, "writing section %d", key_file_no);
+ i = ptr_i;
+ prevcp = key_buf[ptr_top-i];
+ while (1)
+ if (!--i || strcmp (prevcp, key_buf[ptr_top-i]))
+ {
+ key_y_len = strlen(prevcp)+1;
+#if 0
+ yaz_log (YLOG_LOG, "key_y_len: %2d %02x %02x %s",
+ key_y_len, prevcp[0], prevcp[1], 2+prevcp);
+#endif
+ qsort (key_buf + ptr_top-ptr_i, ptr_i - i,
+ sizeof(char*), key_y_compare);
+ cp = key_buf[ptr_top-ptr_i];
+ --key_y_len;
+ encode_key_init (&encode_info);
+ encode_key_write (cp, &encode_info, outf);
+ while (--ptr_i > i)
+ {
+ cp = key_buf[ptr_top-ptr_i];
+ encode_key_write (cp+key_y_len, &encode_info, outf);
+ }
+ encode_key_flush ( &encode_info, outf);
+ if (!i)
+ break;
+ prevcp = key_buf[ptr_top-ptr_i];
+ }
+#endif
+ if (fclose (outf))
+ {
+ yaz_log (YLOG_FATAL|YLOG_ERRNO, "fclose %s", out_fname);
+ exit (1);
+ }
+ yaz_log (YLOG_LOG, "finished section %d", zh->reg->key_file_no);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+}
+
+ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh,
+ zebra_rec_keys_t reckeys,
+ zebra_snippets *snippets)
+{
+ NMEM nmem = nmem_create();
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ const char *str;
+ size_t slen;
+ struct it_key key;
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
+ {
+ char dst_buf[IT_MAX_WORD];
+ char *dst_term = dst_buf;
+ int ord, seqno;
+ int index_type;
+ assert(key.len <= 4 && key.len > 2);
+ seqno = (int) key.mem[key.len-1];
+ ord = key.mem[0];
+
+ zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
+ 0/* db */, 0/* set */, 0/* use */);
+ assert(index_type);
+ zebra_term_untrans_iconv(zh, nmem, index_type,
+ &dst_term, str);
+ zebra_snippets_append(snippets, seqno, ord, dst_term);
+ nmem_reset(nmem);
+ }
+ }
+ nmem_destroy(nmem);
+ return ZEBRA_OK;
+}
+
+void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
+{
+ yaz_log(YLOG_LOG, "print_rec_keys");
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ const char *str;
+ size_t slen;
+ struct it_key key;
+ while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
+ {
+ char dst_buf[IT_MAX_WORD];
+ int seqno;
+ int index_type;
+ const char *db = 0;
+ assert(key.len <= 4 && key.len > 2);
+
+ zebraExplain_lookup_ord(zh->reg->zei,
+ key.mem[0], &index_type, &db, 0, 0);
+
+ seqno = (int) key.mem[key.len-1];
+
+ zebra_term_untrans(zh, index_type, dst_buf, str);
+
+ yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s",
+ key.mem[0], seqno, dst_buf);
+ }
+ }
+}
+
+void extract_add_index_string (RecWord *p, const char *str, int length)
+{
+ struct it_key key;
+
+ ZebraHandle zh = p->extractCtrl->handle;
+ ZebraExplainInfo zei = zh->reg->zei;
+ int ch;
+
+ if (p->index_name)
+ {
+ ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name);
+ }
+ else