+ file_begin (fi);
+ r = file_extract_record (zh, sysno, fname, deleteFlag, fi, 1);
+ } while (r && !sysno && fi->file_more);
+ file_read_stop (fi);
+ if (fd != -1)
+ close (fd);
+ return r;
+}
+
+/*
+ If sysno is provided, then it's used to identify the reocord.
+ If not, and match_criteria is provided, then sysno is guessed
+ If not, and a record is provided, then sysno is got from there
+
+ */
+int buffer_extract_record (ZebraHandle zh,
+ const char *buf, size_t buf_size,
+ int delete_flag,
+ int test_mode,
+ const char *recordType,
+ int *sysno,
+ const char *match_criteria,
+ const char *fname,
+ int force_update,
+ int allow_update)
+{
+ RecordAttr *recordAttr;
+ struct recExtractCtrl extractCtrl;
+ int i, r;
+ const char *matchStr = 0;
+ RecType recType = NULL;
+ char subType[1024];
+ void *clientData;
+ Record rec;
+ long recordOffset = 0;
+ struct zebra_fetch_control fc;
+ const char *pr_fname = fname; /* filename to print .. */
+
+ if (!pr_fname)
+ pr_fname = "<no file>"; /* make it printable if file is omitted */
+
+ fc.fd = -1;
+ fc.record_int_buf = buf;
+ fc.record_int_len = buf_size;
+ fc.record_int_pos = 0;
+ fc.offset_end = 0;
+ fc.record_offset = 0;
+
+ extractCtrl.offset = 0;
+ extractCtrl.readf = zebra_record_int_read;
+ extractCtrl.seekf = zebra_record_int_seek;
+ extractCtrl.tellf = zebra_record_int_tell;
+ extractCtrl.endf = zebra_record_int_end;
+ extractCtrl.fh = &fc;
+
+ zh->reg->keys.buf_used = 0;
+ zh->reg->keys.prevAttrUse = -1;
+ zh->reg->keys.prevAttrSet = -1;
+ zh->reg->keys.prevSeqNo = 0;
+ zh->reg->sortKeys.buf_used = 0;
+
+ if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
+ {
+ if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0],
+ zh->m_explain_database))
+ return 0;
+ }
+
+ if (recordType && *recordType) {
+ logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType);
+ recType = recType_byName (zh->reg->recTypes, recordType, subType,
+ &clientData);
+ } else {
+ if (!(zh->m_record_type)) {
+ logf (LOG_WARN, "No such record type defined");
+ return 0;
+ }
+ logf (LOG_DEBUG, "Get record type from rgroup: %s",zh->m_record_type);
+ recType = recType_byName (zh->reg->recTypes, zh->m_record_type, subType,
+ &clientData);
+ recordType = zh->m_record_type;
+ }
+
+ if (!recType) {
+ logf (LOG_WARN, "No such record type: %s", zh->m_record_type);
+ return 0;
+ }
+
+ extractCtrl.subType = subType;
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+ extractCtrl.handle = zh;
+ extractCtrl.zebra_maps = zh->reg->zebra_maps;
+ extractCtrl.flagShowRecords = 0;
+ for (i = 0; i<256; i++)
+ {
+ if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
+ extractCtrl.seqno[i] = 1;
+ else
+ extractCtrl.seqno[i] = 0;
+ }
+
+ r = (*recType->extract)(clientData, &extractCtrl);
+
+ if (r == RECCTRL_EXTRACT_EOF)
+ return 0;
+ else if (r == RECCTRL_EXTRACT_ERROR_GENERIC)
+ {
+ /* error occured during extraction ... */
+ yaz_log (LOG_WARN, "extract error: generic");
+ return 0;
+ }
+ else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER)
+ {
+ /* error occured during extraction ... */
+ yaz_log (LOG_WARN, "extract error: no such filter");
+ return 0;
+ }
+ if (zh->reg->keys.buf_used == 0)
+ {
+ /* the extraction process returned no information - the record
+ is probably empty - unless flagShowRecords is in use */
+ if (test_mode)
+ return 1;
+ logf (LOG_WARN, "No keys generated for record");
+ logf (LOG_WARN, " The file is probably empty");
+ return 1;
+ }
+ /* match criteria */
+ matchStr = NULL;
+
+ if (! *sysno) {
+ char *rinfo;
+ if (match_criteria && *match_criteria) {
+ matchStr = match_criteria;
+ } else {
+ if (zh->m_record_id && *zh->m_record_id) {
+ matchStr = fileMatchStr (zh, &zh->reg->keys, pr_fname,
+ zh->m_record_id);
+ if (!matchStr)
+ {
+ logf (LOG_WARN, "Bad match criteria (recordID)");
+ return 1;
+ }
+ }
+ }
+ if (matchStr) {
+ rinfo = dict_lookup (zh->reg->matchDict, matchStr);
+ if (rinfo)
+ memcpy (sysno, rinfo+1, sizeof(*sysno));
+ }
+ }
+
+ if (! *sysno)
+ {
+ /* new record */
+ if (delete_flag)
+ {
+ logf (LOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ logf (LOG_WARN, "cannot delete record above (seems new)");
+ return 1;
+ }
+ logf (LOG_LOG, "add %s %s %ld", recordType, pr_fname,
+ (long) recordOffset);
+ rec = rec_new (zh->reg->records);
+
+ *sysno = rec->sysno;
+
+ recordAttr = rec_init_attr (zh->reg->zei, rec);
+
+ if (matchStr)
+ {
+ dict_insert (zh->reg->matchDict, matchStr,
+ sizeof(*sysno), sysno);
+ }
+ extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+ extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+
+ zh->records_inserted++;
+ }
+ else
+ {
+ /* record already exists */
+ struct recKeys delkeys;
+ struct sortKeys sortKeys;
+
+ if (!allow_update) {
+ logf (LOG_LOG, "skipped %s %s %ld",
+ recordType, pr_fname, (long) recordOffset);
+ logRecord(zh);
+ return -1;
+ }
+
+ rec = rec_get (zh->reg->records, *sysno);
+ assert (rec);
+
+ recordAttr = rec_init_attr (zh->reg->zei, rec);
+
+ if (!force_update) {
+ if (recordAttr->runNumber ==
+ zebraExplain_runNumberIncrement (zh->reg->zei, 0))
+ {
+ logf (LOG_LOG, "skipped %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys);
+ rec_rm (&rec);
+ logRecord(zh);
+ return 0;
+ }
+ }
+
+ delkeys.buf_used = rec->size[recInfo_delKeys];
+ delkeys.buf = rec->info[recInfo_delKeys];
+
+ sortKeys.buf_used = rec->size[recInfo_sortKeys];
+ sortKeys.buf = rec->info[recInfo_sortKeys];
+
+ extract_flushSortKeys (zh, *sysno, 0, &sortKeys);
+ extract_flushRecordKeys (zh, *sysno, 0, &delkeys);
+ if (delete_flag)
+ {
+ /* record going to be deleted */
+ if (!delkeys.buf_used)
+ {
+ logf (LOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ logf (LOG_WARN, "cannot delete file above, storeKeys false");
+ }
+ else
+ {
+ logf (LOG_LOG, "delete %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ zh->records_deleted++;
+ if (matchStr)
+ dict_delete (zh->reg->matchDict, matchStr);
+ rec_del (zh->reg->records, &rec);
+ }
+ rec_rm (&rec);
+ logRecord(zh);
+ return 0;
+ }
+ else
+ {
+ /* record going to be updated */
+ if (!delkeys.buf_used)
+ {
+ logf (LOG_LOG, "update %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ logf (LOG_WARN, "cannot update file above, storeKeys false");
+ }
+ else
+ {
+ logf (LOG_LOG, "update %s %s %ld", recordType,
+ pr_fname, (long) recordOffset);
+ extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys);
+ extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys);
+ zh->records_updated++;
+ }
+ }
+ }
+ /* update file type */
+ xfree (rec->info[recInfo_fileType]);
+ rec->info[recInfo_fileType] =
+ rec_strdup (recordType, &rec->size[recInfo_fileType]);
+
+ /* update filename */
+ xfree (rec->info[recInfo_filename]);
+ rec->info[recInfo_filename] =
+ rec_strdup (fname, &rec->size[recInfo_filename]);
+
+ /* update delete keys */
+ xfree (rec->info[recInfo_delKeys]);
+ if (zh->reg->keys.buf_used > 0 && zh->m_store_keys == 1)
+ {
+ rec->size[recInfo_delKeys] = zh->reg->keys.buf_used;
+ rec->info[recInfo_delKeys] = zh->reg->keys.buf;
+ zh->reg->keys.buf = NULL;
+ zh->reg->keys.buf_max = 0;
+ }
+ else
+ {
+ rec->info[recInfo_delKeys] = NULL;
+ rec->size[recInfo_delKeys] = 0;
+ }
+
+ /* update sort keys */
+ xfree (rec->info[recInfo_sortKeys]);
+
+ rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
+ rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
+ zh->reg->sortKeys.buf = NULL;
+ zh->reg->sortKeys.buf_max = 0;
+
+ /* save file size of original record */
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ - recordAttr->recordSize);
+#if 0
+ recordAttr->recordSize = fi->file_moffset - recordOffset;
+ if (!recordAttr->recordSize)
+ recordAttr->recordSize = fi->file_max - recordOffset;
+#else
+ recordAttr->recordSize = buf_size;
+#endif
+ zebraExplain_recordBytesIncrement (zh->reg->zei,
+ recordAttr->recordSize);
+
+ /* set run-number for this record */
+ recordAttr->runNumber =
+ zebraExplain_runNumberIncrement (zh->reg->zei, 0);
+
+ /* update store data */
+ xfree (rec->info[recInfo_storeData]);
+ if (zh->m_store_data)
+ {
+ rec->size[recInfo_storeData] = recordAttr->recordSize;
+ rec->info[recInfo_storeData] = (char *)
+ xmalloc (recordAttr->recordSize);
+ memcpy (rec->info[recInfo_storeData], buf, recordAttr->recordSize);
+ }
+ else
+ {
+ rec->info[recInfo_storeData] = NULL;
+ rec->size[recInfo_storeData] = 0;
+ }
+ /* update database name */
+ xfree (rec->info[recInfo_databaseName]);
+ rec->info[recInfo_databaseName] =
+ rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]);
+
+ /* update offset */
+ recordAttr->recordOffset = recordOffset;
+
+ /* commit this record */
+ rec_put (zh->reg->records, &rec);
+ logRecord(zh);
+ return 0;
+}
+
+int explain_extract (void *handle, Record rec, data1_node *n)
+{
+ ZebraHandle zh = (ZebraHandle) handle;
+ struct recExtractCtrl extractCtrl;
+ int i;
+
+ if (zebraExplain_curDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName]))
+ {
+ abort();
+ if (zebraExplain_newDatabase (zh->reg->zei,
+ rec->info[recInfo_databaseName], 0))
+ abort ();
+ }
+
+ zh->reg->keys.buf_used = 0;
+ zh->reg->keys.prevAttrUse = -1;
+ zh->reg->keys.prevAttrSet = -1;
+ zh->reg->keys.prevSeqNo = 0;
+ zh->reg->sortKeys.buf_used = 0;
+
+ extractCtrl.init = extract_init;
+ extractCtrl.tokenAdd = extract_token_add;
+ extractCtrl.schemaAdd = extract_schema_add;
+ extractCtrl.dh = zh->reg->dh;
+ for (i = 0; i<256; i++)
+ extractCtrl.seqno[i] = 0;
+ extractCtrl.zebra_maps = zh->reg->zebra_maps;
+ extractCtrl.flagShowRecords = 0;
+ extractCtrl.handle = handle;
+
+ if (n)
+ grs_extract_tree(&extractCtrl, n);
+
+ if (rec->size[recInfo_delKeys])
+ {
+ struct recKeys delkeys;
+ struct sortKeys sortkeys;
+
+ delkeys.buf_used = rec->size[recInfo_delKeys];
+ delkeys.buf = rec->info[recInfo_delKeys];
+
+ sortkeys.buf_used = rec->size[recInfo_sortKeys];
+ sortkeys.buf = rec->info[recInfo_sortKeys];
+
+ extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys);
+ extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys);
+ }
+ extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys);
+ extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys);
+
+ xfree (rec->info[recInfo_delKeys]);
+ rec->size[recInfo_delKeys] = zh->reg->keys.buf_used;
+ rec->info[recInfo_delKeys] = zh->reg->keys.buf;
+ zh->reg->keys.buf = NULL;
+ zh->reg->keys.buf_max = 0;
+
+ xfree (rec->info[recInfo_sortKeys]);
+ rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used;
+ rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf;
+ zh->reg->sortKeys.buf = NULL;
+ zh->reg->sortKeys.buf_max = 0;
+
+ return 0;
+}
+
+void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
+ int cmd, struct recKeys *reckeys)
+{
+#if SU_SCHEME
+#else
+ unsigned char attrSet = (unsigned char) -1;
+ unsigned short attrUse = (unsigned short) -1;
+#endif
+ int seqno = 0;
+ int off = 0;
+ int ch = 0;
+ ZebraExplainInfo zei = zh->reg->zei;
+
+ if (!zh->reg->key_buf)
+ {
+ int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
+ if (mem <= 0)
+ {
+ logf(LOG_WARN, "Invalid memory setting, using default 8 MB");
+ mem= 1024*1024*8;
+ }
+ /* FIXME: That "8" should be in a default settings include */
+ /* not hard-coded here! -H */
+ zh->reg->key_buf = (char**) xmalloc (mem);
+ zh->reg->ptr_top = mem/sizeof(char*);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ zh->reg->key_file_no = 0;
+ }
+ zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
+ while (off < reckeys->buf_used)
+ {
+ const char *src = reckeys->buf + off;
+ struct it_key key;
+ int lead;
+
+ lead = *src++;
+
+#if SU_SCHEME
+ if ((lead & 3) < 3)
+ {
+ memcpy (&ch, src, sizeof(ch));
+ src += sizeof(ch);
+ }
+#else
+ if (!(lead & 1))
+ {
+ memcpy (&attrSet, src, sizeof(attrSet));
+ src += sizeof(attrSet);
+ }
+ if (!(lead & 2))
+ {
+ memcpy (&attrUse, src, sizeof(attrUse));
+ src += sizeof(attrUse);
+ }
+#endif
+ if (zh->reg->key_buf_used + 1024 >
+ (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
+ extract_flushWriteKeys (zh,0);
+ ++(zh->reg->ptr_i);
+ (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
+ (char*)zh->reg->key_buf + zh->reg->key_buf_used;
+#if SU_SCHEME
+#else
+ ch = zebraExplain_lookupSU (zei, attrSet, attrUse);
+ if (ch < 0)
+ ch = zebraExplain_addSU (zei, attrSet, attrUse);
+#endif
+ assert (ch > 0);
+ zh->reg->key_buf_used +=
+ key_SU_encode (ch,((char*)zh->reg->key_buf) +
+ zh->reg->key_buf_used);
+
+ while (*src)
+ ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++;
+ src++;
+ ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0';
+ ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd;
+
+ if (lead & 60)
+ seqno += ((lead>>2) & 15)-1;
+ else
+ {
+ memcpy (&seqno, src, sizeof(seqno));
+ src += sizeof(seqno);
+ }
+ key.seqno = seqno;
+ key.sysno = sysno;
+ memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, &key, sizeof(key));
+ (zh->reg->key_buf_used) += sizeof(key);
+ off = src - reckeys->buf;
+ }
+ assert (off == reckeys->buf_used);
+}
+
+void extract_flushWriteKeys (ZebraHandle zh, int final)
+ /* optimizing: if final=1, and no files written yet */
+ /* push the keys directly to merge, sidestepping the */
+ /* temp file altogether. Speeds small updates */
+{
+ FILE *outf;
+ char out_fname[200];
+ char *prevcp, *cp;
+ struct encode_info encode_info;
+ int ptr_i = zh->reg->ptr_i;
+ int temp_policy;
+#if SORT_EXTRA
+ int i;
+#endif
+ if (!zh->reg->key_buf || ptr_i <= 0)
+ {
+ logf (LOG_DEBUG, " nothing to flush section=%d buf=%p i=%d",
+ zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
+ logf (LOG_DEBUG, " buf=%p ",
+ zh->reg->key_buf);
+ logf (LOG_DEBUG, " ptr=%d ",zh->reg->ptr_i);
+ logf (LOG_DEBUG, " reg=%p ",zh->reg);
+