X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=836b6de7a984e6de3cb25495b7e8e1b32a780b8d;hb=593927cb1897c1e3163c284448eff7fee6ddad51;hp=59822b9b0bb9a13ef1b9962c74291a2cd0c53ab3;hpb=9eebf93dc2525854867cbc43920ea8ba4a199ab5;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 59822b9..836b6de 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.187 2005-06-23 06:45:46 adam Exp $ +/* $Id: extract.c,v 1.196 2005-10-28 07:25:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -98,42 +98,79 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) w->section_id = 0; } -static const char **searchRecordKey (ZebraHandle zh, - struct recKeys *reckeys, - int attrSetS, int attrUseS) +static void searchRecordKey(ZebraHandle zh, +#if NEW_REC_KEYS + zebra_rec_keys_t reckeys, +#else + const struct recKeys *reckeys, +#endif + int attrSetS, int attrUseS, + const char **ws, int ws_length) { - static const char *ws[32]; +#if NEW_REC_KEYS +#else void *decode_handle = iscz1_start(); int off = 0; int startSeq = -1; int seqno = 0; +#endif int i; + int ch; - for (i = 0; i<32; i++) + for (i = 0; ireg->zei, + attrSetS, attrUseS); + if (ch < 0) + return ; + +#if NEW_REC_KEYS + if (zebra_rec_keys_rewind(reckeys)) + { + int startSeq = -1; + const char *str; + size_t slen; + struct it_key key; + zint seqno; + while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + assert(key.len <= 4 && key.len > 2); + + seqno = key.mem[key.len-1]; + + if (key.mem[0] == ch) + { + int woff; + + if (startSeq == -1) + startSeq = seqno; + woff = seqno - startSeq; + if (woff >= 0 && woff < ws_length) + ws[woff] = str; + } + } + } +#else while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; struct it_key key; char *dst = (char*) &key; - int attrSet, attrUse; iscz1_decode(decode_handle, &dst, &src); assert(key.len <= 4 && key.len > 2); - attrSet = (int) key.mem[0] >> 16; - attrUse = (int) key.mem[0] & 65535; seqno = (int) key.mem[key.len-1]; - if (attrUseS == attrUse && attrSetS == attrSet) + if (key.mem[0] == ch) { int woff; if (startSeq == -1) startSeq = seqno; woff = seqno - startSeq; - if (woff >= 0 && woff < 31) + if (woff >= 0 && woff < ws_length) ws[woff] = src; } @@ -143,7 +180,7 @@ static const char **searchRecordKey (ZebraHandle zh, } iscz1_stop(decode_handle); assert (off == reckeys->buf_used); - return ws; +#endif } struct file_read_info { @@ -212,13 +249,16 @@ static void file_end (void *handle, off_t offset) } static char *fileMatchStr (ZebraHandle zh, +#if NEW_REC_KEYS + zebra_rec_keys_t reckeys, +#else struct recKeys *reckeys, +#endif const char *fname, const char *spec) { static char dstBuf[2048]; /* static here ??? */ char *dst = dstBuf; const char *s = spec; - static const char **w; while (1) { @@ -228,6 +268,7 @@ static char *fileMatchStr (ZebraHandle zh, break; if (*s == '(') { + const char *ws[32]; char attset_str[64], attname_str[64]; data1_attset *attset; int i; @@ -260,8 +301,7 @@ static char *fileMatchStr (ZebraHandle zh, else attUse = atoi (attname_str); } - w = searchRecordKey (zh, reckeys, attSet, attUse); - assert (w); + searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); if (*s == ')') { @@ -277,15 +317,15 @@ static char *fileMatchStr (ZebraHandle zh, s++; for (i = 0; i<32; i++) - if (matchFlag[i] && w[i]) + if (matchFlag[i] && ws[i]) { if (first) { *dst++ = ' '; first = 0; } - strcpy (dst, w[i]); - dst += strlen(w[i]); + strcpy (dst, ws[i]); + dst += strlen(ws[i]); } if (first) { @@ -368,11 +408,14 @@ struct recordLogInfo { struct recordGroup *rGroup; }; +#if NEW_REC_KEYS +#else void create_rec_keys_codec(struct recKeys *keys) { keys->buf_used = 0; iscz1_reset(keys->codec_handle); } +#endif static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) { @@ -402,6 +445,7 @@ static int file_extract_record(ZebraHandle zh, SYSNO sysnotmp; Record rec; off_t recordOffset = 0; + struct recExtractCtrl extractCtrl; /* announce database */ if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) @@ -413,12 +457,19 @@ static int file_extract_record(ZebraHandle zh, if (fi->fd != -1) { - struct recExtractCtrl extractCtrl; - /* we are going to read from a file, so prepare the extraction */ +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->keys); +#else create_rec_keys_codec(&zh->reg->keys); +#endif + #if NATTR +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->sortKeys); +#else create_rec_keys_codec(&zh->reg->sortKeys); +#endif #else zh->reg->sortKeys.buf_used = 0; #endif @@ -435,6 +486,8 @@ static int file_extract_record(ZebraHandle zh, extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; extractCtrl.match_criteria[0] = '\0'; + extractCtrl.staticrank = 0; + extractCtrl.first_record = fi->file_offset ? 0 : 1; extract_set_store_data_prepare(&extractCtrl); @@ -479,7 +532,7 @@ static int file_extract_record(ZebraHandle zh, return 0; } if (extractCtrl.match_criteria[0]) - matchStr = extractCtrl.match_criteria; + matchStr = extractCtrl.match_criteria; } /* perform match if sysno not known and if match criteria is specified */ @@ -490,9 +543,13 @@ static int file_extract_record(ZebraHandle zh, if (matchStr == 0 && zh->m_record_id && *zh->m_record_id) { - +#if NEW_REC_KEYS + matchStr = fileMatchStr (zh, zh->reg->keys, fname, + zh->m_record_id); +#else matchStr = fileMatchStr (zh, &zh->reg->keys, fname, zh->m_record_id); +#endif if (!matchStr) { yaz_log(YLOG_WARN, "Bad match criteria"); @@ -509,7 +566,13 @@ static int file_extract_record(ZebraHandle zh, } } } - if (! *sysno && zh->reg->keys.buf_used == 0) + if (! *sysno +#if NEW_REC_KEYS + && zebra_rec_keys_empty(zh->reg->keys) +#else + && zh->reg->keys.buf_used == 0 +#endif + ) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ @@ -540,22 +603,37 @@ static int file_extract_record(ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); + recordAttr->staticrank = extractCtrl.staticrank; if (matchStr) { dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); } extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); - +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); +#else + extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, + recordAttr->staticrank); +#endif zh->records_inserted++; } else { /* record already exists */ +#if NEW_REC_KEYS + zebra_rec_keys_t delkeys = zebra_rec_keys_open(); + +#else struct recKeys delkeys; +#endif #if NATTR +#if NEW_REC_KEYS + zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); +#else struct recKeys sortKeys; +#endif #else struct sortKeys sortKeys; #endif @@ -565,40 +643,56 @@ static int file_extract_record(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); - if (!force_update && recordAttr->runNumber == - zebraExplain_runNumberIncrement (zh->reg->zei, 0)) - { - yaz_log (YLOG_LOG, "run number = " ZINT_FORMAT, - recordAttr->runNumber); - yaz_log (YLOG_LOG, "skipped %s %s " PRINTF_OFF_T, - zh->m_record_type, fname, recordOffset); - extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); - rec_rm (&rec); - logRecord (zh); - return 1; - } +#if NEW_REC_KEYS + zebra_rec_keys_set_buf(delkeys, + rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], + 0); +#else + /* flush old keys for sort&search etc. */ delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; +#endif +#if NEW_REC_KEYS && NATTR + zebra_rec_keys_set_buf(sortKeys, + rec->info[recInfo_sortKeys], + rec->size[recInfo_sortKeys], + 0); + extract_flushSortKeys (zh, *sysno, 0, sortKeys); +#else sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, *sysno, 0, &sortKeys); - extract_flushRecordKeys (zh, *sysno, 0, &delkeys); +#endif + +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, *sysno, 0, delkeys, + recordAttr->staticrank); /* old values */ +#else + extract_flushRecordKeys (zh, *sysno, 0, &delkeys, + recordAttr->staticrank); /* old values */ +#endif if (deleteFlag) { /* record going to be deleted */ - if (!delkeys.buf_used) + if ( +#if NEW_REC_KEYS + zebra_rec_keys_empty(delkeys) +#else + !delkeys.buf_used +#endif + ) { yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, - zh->m_record_type, fname, recordOffset); + zh->m_record_type, fname, recordOffset); yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false"); } else { if (zh->records_processed < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, - zh->m_record_type, fname, recordOffset); + zh->m_record_type, fname, recordOffset); zh->records_deleted++; if (matchStr) dict_delete (zh->reg->matchDict, matchStr); @@ -611,7 +705,13 @@ static int file_extract_record(ZebraHandle zh, else { /* record going to be updated */ - if (!delkeys.buf_used) + if ( +#if NEW_REC_KEYS + zebra_rec_keys_empty(delkeys) +#else + !delkeys.buf_used +#endif + ) { yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); @@ -619,14 +719,28 @@ static int file_extract_record(ZebraHandle zh, } else { + /* flush new keys for sort&search etc */ if (zh->records_processed < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); + recordAttr->staticrank = extractCtrl.staticrank; extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); +#else + extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, + recordAttr->staticrank); +#endif zh->records_updated++; } } +#if NEW_REC_KEYS + zebra_rec_keys_close(delkeys); +#endif +#if NATTR && NEW_REC_KWYS + zebra_rec_keys_close(sortKeys); +#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -640,12 +754,25 @@ static int file_extract_record(ZebraHandle zh, /* update delete keys */ xfree (rec->info[recInfo_delKeys]); - if (zh->reg->keys.buf_used > 0 && zh->m_store_keys == 1) + if ( +#if NEW_REC_KEYS + !zebra_rec_keys_empty(zh->reg->keys) +#else + zh->reg->keys.buf_used > 0 +#endif + && zh->m_store_keys == 1) { +#if NEW_REC_KEYS + zebra_rec_keys_get_buf(zh->reg->keys, + &rec->info[recInfo_delKeys], + &rec->size[recInfo_delKeys]); +#else + rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; +#endif } else { @@ -834,16 +961,16 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, If not, and a record is provided, then sysno is got from there */ -ZEBRA_RES buffer_extract_record (ZebraHandle zh, - const char *buf, size_t buf_size, - int delete_flag, - int test_mode, - const char *recordType, - SYSNO *sysno, - const char *match_criteria, - const char *fname, - int force_update, - int allow_update) +ZEBRA_RES buffer_extract_record(ZebraHandle zh, + const char *buf, size_t buf_size, + int delete_flag, + int test_mode, + const char *recordType, + SYSNO *sysno, + const char *match_criteria, + const char *fname, + int force_update, + int allow_update) { RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; @@ -875,9 +1002,17 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, extractCtrl.first_record = 1; extractCtrl.fh = &fc; +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->keys); +#else create_rec_keys_codec(&zh->reg->keys); +#endif #if NATTR +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->sortKeys); +#else create_rec_keys_codec(&zh->reg->sortKeys); +#endif #else zh->reg->sortKeys.buf_used = 0; #endif @@ -919,6 +1054,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, extractCtrl.dh = zh->reg->dh; extractCtrl.handle = zh; extractCtrl.match_criteria[0] = '\0'; + extractCtrl.staticrank = 0; init_extractCtrl(zh, &extractCtrl); @@ -952,8 +1088,13 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, matchStr = match_criteria; } else { if (zh->m_record_id && *zh->m_record_id) { +#if NEW_REC_KEYS + matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname, + zh->m_record_id); +#else matchStr = fileMatchStr (zh, &zh->reg->keys, pr_fname, zh->m_record_id); +#endif if (!matchStr) { yaz_log (YLOG_WARN, "Bad match criteria (recordID)"); @@ -970,7 +1111,13 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, } } } - if (zh->reg->keys.buf_used == 0) + if ( +#if NEW_REC_KEYS + zebra_rec_keys_empty(zh->reg->keys) +#else + zh->reg->keys.buf_used == 0 +#endif +) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ @@ -997,6 +1144,7 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, *sysno = rec->sysno; recordAttr = rec_init_attr (zh->reg->zei, rec); + recordAttr->staticrank = extractCtrl.staticrank; if (matchStr) { @@ -1004,14 +1152,31 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, sizeof(*sysno), sysno); } extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); + +#if NEW_REC_KEYS +#if 0 + print_rec_keys(zh, zh->reg->keys); +#endif + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); +#else +#if 0 + print_rec_keys(zh, &zh->reg->keys); +#endif + extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, + recordAttr->staticrank); +#endif zh->records_inserted++; } else { /* record already exists */ +#if NEW_REC_KEYS + zebra_rec_keys_t delkeys = zebra_rec_keys_open(); +#else struct recKeys delkeys; +#endif #if NATTR struct recKeys sortKeys; #else @@ -1031,33 +1196,41 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, assert (rec); recordAttr = rec_init_attr (zh->reg->zei, rec); - - if (!force_update) { - if (recordAttr->runNumber == - zebraExplain_runNumberIncrement (zh->reg->zei, 0)) - { - if (show_progress) - yaz_log (YLOG_LOG, "skipped %s %s %ld", recordType, - pr_fname, (long) recordOffset); - extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); - rec_rm (&rec); - logRecord(zh); - return ZEBRA_FAIL; - } - } +#if NEW_REC_KEYS + zebra_rec_keys_set_buf(delkeys, + rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], + 0); +#else delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; - +#endif sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; +#if NEW_REC_KEYS && NATTR + extract_flushSortKeys (zh, *sysno, 0, sortKeys); +#else extract_flushSortKeys (zh, *sysno, 0, &sortKeys); - extract_flushRecordKeys (zh, *sysno, 0, &delkeys); +#endif +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, *sysno, 0, delkeys, + recordAttr->staticrank); +#else + extract_flushRecordKeys (zh, *sysno, 0, &delkeys, + recordAttr->staticrank); +#endif if (delete_flag) { /* record going to be deleted */ - if (!delkeys.buf_used) + if ( +#if NEW_REC_KEYS + zebra_rec_keys_empty(delkeys) +#else + !delkeys.buf_used +#endif + ) { if (show_progress) { @@ -1084,7 +1257,13 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, else { /* record going to be updated */ - if (!delkeys.buf_used) + if ( +#if NEW_REC_KEYS + zebra_rec_keys_empty(delkeys) +#else + !delkeys.buf_used +#endif + ) { if (show_progress) { @@ -1098,11 +1277,24 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, if (show_progress) yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); + recordAttr->staticrank = extractCtrl.staticrank; extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); +#else + extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, + recordAttr->staticrank); +#endif zh->records_updated++; } } +#if NEW_REC_KEYS + zebra_rec_keys_close(delkeys); +#endif +#if NEW_REC_KEYS && NATTR + zebra_rec_keys_close(sortKeys); +#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -1116,19 +1308,31 @@ ZEBRA_RES buffer_extract_record (ZebraHandle zh, /* update delete keys */ xfree (rec->info[recInfo_delKeys]); - if (zh->reg->keys.buf_used > 0 && zh->m_store_keys == 1) + if ( +#if NEW_REC_KEYS + !zebra_rec_keys_empty(zh->reg->keys) +#else + zh->reg->keys.buf_used > 0 +#endif + && zh->m_store_keys == 1) { +#if NEW_REC_KEYS + zebra_rec_keys_get_buf(zh->reg->keys, + &rec->info[recInfo_delKeys], + &rec->size[recInfo_delKeys]); +#else + rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; +#endif } else { rec->info[recInfo_delKeys] = NULL; rec->size[recInfo_delKeys] = 0; } - /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); @@ -1204,9 +1408,18 @@ int explain_extract (void *handle, Record rec, data1_node *n) abort (); } +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->keys); +#else create_rec_keys_codec(&zh->reg->keys); +#endif + #if NATTR +#if NEW_REC_KEYS + zebra_rec_keys_reset(zh->reg->sortKeys); +#else create_rec_keys_codec(&zh->reg->sortKeys); +#endif #else zh->reg->sortKeys.buf_used = 0; #endif @@ -1219,6 +1432,7 @@ int explain_extract (void *handle, Record rec, data1_node *n) extractCtrl.flagShowRecords = 0; extractCtrl.match_criteria[0] = '\0'; + extractCtrl.staticrank = 0; extractCtrl.handle = handle; extractCtrl.first_record = 1; @@ -1229,30 +1443,68 @@ int explain_extract (void *handle, Record rec, data1_node *n) if (rec->size[recInfo_delKeys]) { +#if NEW_REC_KEYS + zebra_rec_keys_t delkeys = zebra_rec_keys_open(); +#else struct recKeys delkeys; +#endif + #if NATTR +#if NEW_REC_KEYS + zebra_rec_keys_t sortkeys = zzebra_rec_keys_open(); +#else struct recKeys sortkeys; +#endif #else struct sortKeys sortkeys; #endif +#if NEW_REC_KEYS + zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], + 0); + extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); + zebra_rec_keys_close(delkeys); +#else delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; + extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys, 0); +#endif +#if NATTR && NEW_REC_KEYS + zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], + rec->size[recInfo_sortKeys], + 0); + extract_flushSortKeys (zh, rec->sysno, 0, sortkeys); + zebra_rec_keys_close(sortkeys); +#else sortkeys.buf_used = rec->size[recInfo_sortKeys]; sortkeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys); - extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys); +#endif } - extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys); +#if NEW_REC_KEYS + extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); +#else + extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys, 0); +#endif +#if NATTR && NEW_REC_KEYS + extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys); +#endif xfree (rec->info[recInfo_delKeys]); +#if NEW_REC_KEYS + zebra_rec_keys_get_buf(zh->reg->keys, + &rec->info[recInfo_delKeys], + &rec->size[recInfo_delKeys]); +#else rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; rec->info[recInfo_delKeys] = zh->reg->keys.buf; zh->reg->keys.buf = NULL; zh->reg->keys.buf_max = 0; +#endif xfree (rec->info[recInfo_sortKeys]); rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; @@ -1264,11 +1516,20 @@ int explain_extract (void *handle, Record rec, data1_node *n) } void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct recKeys *reckeys) + int cmd, +#if NEW_REC_KEYS + zebra_rec_keys_t reckeys, +#else + struct recKeys *reckeys, +#endif + zint staticrank) { +#if NEW_REC_KEYS +#else void *decode_handle = iscz1_start(); int off = 0; int ch = 0; +#endif ZebraExplainInfo zei = zh->reg->zei; if (!zh->reg->key_buf) @@ -1289,49 +1550,118 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); +#if NEW_REC_KEYS + if (zebra_rec_keys_rewind(reckeys)) + { + size_t slen; + const char *str; + struct it_key key_in; + while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ch = 0; + struct it_key key_out; + zint *keyp = key_out.mem; + + assert(key_in.len == 4); + + /* check for buffer overflow */ + if (zh->reg->key_buf_used + 1024 > + (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) + extract_flushWriteKeys (zh, 0); + + ++(zh->reg->ptr_i); + assert(zh->reg->ptr_i > 0); + (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = + (char*)zh->reg->key_buf + zh->reg->key_buf_used; + + /* encode the ordinal value (field/use/attribute) .. */ + ch = (int) key_in.mem[0]; + zh->reg->key_buf_used += + key_SU_encode(ch, (char*)zh->reg->key_buf + + zh->reg->key_buf_used); + + /* copy the 0-terminated stuff from str to output */ + memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen); + zh->reg->key_buf_used += slen; + ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = '\0'; + + /* the delete/insert indicator */ + ((char*)zh->reg->key_buf)[(zh->reg->key_buf_used)++] = cmd; + + if (zh->m_staticrank) /* rank config enabled ? */ + { + *keyp++ = staticrank; + key_out.len = 4; + } + else + key_out.len = 3; + + if (key_in.mem[1]) /* filter specified record ID */ + *keyp++ = key_in.mem[1]; + else + *keyp++ = sysno; + *keyp++ = key_in.mem[2]; /* section_id */ + *keyp++ = key_in.mem[3]; /* sequence .. */ + + memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, + &key_out, sizeof(key_out)); + (zh->reg->key_buf_used) += sizeof(key_out); + } + } +#else while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; + struct it_key key_in; + struct it_key key_out; + char *dst = (char*) &key_in; + zint *keyp = key_out.mem; iscz1_decode(decode_handle, &dst, &src); - assert(key.len == 4); + assert(key_in.len == 4); if (zh->reg->key_buf_used + 1024 > (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) - extract_flushWriteKeys (zh,0); + extract_flushWriteKeys(zh, 0); ++(zh->reg->ptr_i); assert(zh->reg->ptr_i > 0); (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = (char*)zh->reg->key_buf + zh->reg->key_buf_used; - ch = (int) key.mem[0]; /* ordinal for field/use/attribute */ + ch = (int) key_in.mem[0]; /* ordinal for field/use/attribute */ zh->reg->key_buf_used += - key_SU_encode (ch,((char*)zh->reg->key_buf) + - zh->reg->key_buf_used); + key_SU_encode(ch, ((char*)zh->reg->key_buf) + + zh->reg->key_buf_used); while (*src) ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++; src++; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0'; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd; - key.len = 3; - if (key.mem[1]) /* filter specified record ID */ - key.mem[0] = key.mem[1]; + if (zh->m_staticrank) /* rank config enabled ? */ + { + *keyp++ = staticrank; + key_out.len = 4; + } + else + key_out.len = 3; + + if (key_in.mem[1]) /* filter specified record ID */ + *keyp++ = key_in.mem[1]; else - key.mem[0] = sysno; - key.mem[1] = key.mem[2]; /* section_id */ - key.mem[2] = key.mem[3]; /* sequence .. */ + *keyp++ = sysno; + *keyp++ = key_in.mem[2]; /* section_id */ + *keyp++ = key_in.mem[3]; /* sequence .. */ memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, - &key, sizeof(key)); - (zh->reg->key_buf_used) += sizeof(key); + &key_out, sizeof(key_out)); + (zh->reg->key_buf_used) += sizeof(key_out); off = src - reckeys->buf; } assert (off == reckeys->buf_used); iscz1_stop(decode_handle); +#endif } void extract_flushWriteKeys (ZebraHandle zh, int final) @@ -1468,6 +1798,15 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } +#if NEW_REC_KEYS +void extract_add_it_key (ZebraHandle zh, + zebra_rec_keys_t *keys, + int reg_type, + const char *str, int slen, struct it_key *key) +{ + zebra_rec_keys_write(*keys, reg_type, str, slen, key); +} +#else void extract_add_it_key (ZebraHandle zh, struct recKeys *keys, int reg_type, @@ -1496,17 +1835,47 @@ void extract_add_it_key (ZebraHandle zh, *dst++ = '\0'; keys->buf_used = dst - keys->buf; } +#endif -ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys, +ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, +#if NEW_REC_KEYS + zebra_rec_keys_t reckeys, +#else + struct recKeys *reckeys, +#endif zebra_snippets *snippets) { - void *decode_handle = iscz1_start(); - int off = 0; - int seqno = 0; NMEM nmem = nmem_create(); - yaz_log(YLOG_LOG, "zebra_rec_keys_snippets buf=%p sz=%d", reckeys->buf, - reckeys->buf_used); +#if NEW_REC_KEYS + if (zebra_rec_keys_rewind(reckeys)) + { + const char *str; + size_t slen; + struct it_key key; + while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + char dst_buf[IT_MAX_WORD]; + char *dst_term = dst_buf; + int ord, seqno; + int index_type; + assert(key.len <= 4 && key.len > 2); + seqno = (int) key.mem[key.len-1]; + ord = key.mem[0]; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, 0/* set */, 0/* use */); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + zebra_snippets_append(snippets, seqno, ord, dst_term); + nmem_reset(nmem); + } + } +#else + int off = 0; + int seqno = 0; + void *decode_handle = iscz1_start(); assert(reckeys->buf); while (off < reckeys->buf_used) { @@ -1515,31 +1884,69 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys, char *dst = (char*) &key; char dst_buf[IT_MAX_WORD]; char *dst_term = dst_buf; + int index_type = 0, ord; iscz1_decode(decode_handle, &dst, &src); assert(key.len <= 4 && key.len > 2); seqno = (int) key.mem[key.len-1]; - - zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1); - zebra_snippets_append(snippets, seqno, src[0], key.mem[0], dst_term); + ord = key.mem[0]; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, 0/* set */, 0/* use */); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, src); + zebra_snippets_append(snippets, seqno, ord, dst_term); while (*src++) ; off = src - reckeys->buf; nmem_reset(nmem); } - nmem_destroy(nmem); iscz1_stop(decode_handle); +#endif + nmem_destroy(nmem); return ZEBRA_OK; } -void print_rec_keys(ZebraHandle zh, struct recKeys *reckeys) +void print_rec_keys(ZebraHandle zh, +#if NEW_REC_KEYS + zebra_rec_keys_t reckeys +#else + struct recKeys *reckeys +#endif +) { - void *decode_handle = iscz1_start(); +#if NEW_REC_KEYS + yaz_log(YLOG_LOG, "print_rec_keys"); + if (zebra_rec_keys_rewind(reckeys)) + { + const char *str; + size_t slen; + struct it_key key; + while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) + { + char dst_buf[IT_MAX_WORD]; + int seqno; + int index_type; + const char *db = 0; + assert(key.len <= 4 && key.len > 2); + + zebraExplain_lookup_ord(zh->reg->zei, + key.mem[0], &index_type, &db, 0, 0); + + seqno = (int) key.mem[key.len-1]; + + zebra_term_untrans(zh, index_type, dst_buf, str); + + yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s", + key.mem[0], seqno, dst_buf); + } + } +#else int off = 0; int seqno = 0; - NMEM nmem = nmem_create(); - + void *decode_handle = iscz1_start(); yaz_log(YLOG_LOG, "print_rec_keys buf=%p sz=%d", reckeys->buf, reckeys->buf_used); assert(reckeys->buf); @@ -1548,28 +1955,28 @@ void print_rec_keys(ZebraHandle zh, struct recKeys *reckeys) const char *src = reckeys->buf + off; struct it_key key; char *dst = (char*) &key; - int attrSet, attrUse; char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; + int index_type; + const char *db = 0; iscz1_decode(decode_handle, &dst, &src); assert(key.len <= 4 && key.len > 2); - attrSet = (int) key.mem[0] >> 16; - attrUse = (int) key.mem[0] & 65535; seqno = (int) key.mem[key.len-1]; + + zebraExplain_lookup_ord(zh->reg->zei, + key.mem[0], &index_type, &db, 0, 0); - zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1); + zebra_term_untrans(zh, index_type, dst_buf, src); yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s", - key.mem[0], seqno, dst_term); + key.mem[0], seqno, dst_buf); while (*src++) ; off = src - reckeys->buf; - nmem_reset(nmem); } - nmem_destroy(nmem); iscz1_stop(decode_handle); +#endif } void extract_add_index_string (RecWord *p, const char *str, int length) @@ -1658,9 +2065,9 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) { int set, use, slen; - off += key_SU_decode(&set, sk->buf + off); - off += key_SU_decode(&use, sk->buf + off); - off += key_SU_decode(&slen, sk->buf + off); + off += key_SU_decode(&set, (unsigned char *) sk->buf + off); + off += key_SU_decode(&use, (unsigned char *) sk->buf + off); + off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); off += slen; if (p->attrSet == set && p->attrUse == use) return; @@ -1904,9 +2311,9 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, { int set, use, slen; - off += key_SU_decode(&set, sk->buf + off); - off += key_SU_decode(&use, sk->buf + off); - off += key_SU_decode(&slen, sk->buf + off); + off += key_SU_decode(&set, (unsigned char *) sk->buf + off); + off += key_SU_decode(&use, (unsigned char *) sk->buf + off); + off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); sortIdx_type(sortIdx, use); if (cmd == 1)