X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=9d2742c928eb35ce7195ba94fe2bd4742a26bff1;hb=3c37091eb3e508bff58b297eaa0feab5e92ff53b;hp=836b6de7a984e6de3cb25495b7e8e1b32a780b8d;hpb=593927cb1897c1e3163c284448eff7fee6ddad51;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 836b6de..9d2742c 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.196 2005-10-28 07:25:30 adam Exp $ +/* $Id: extract.c,v 1.204 2006-03-20 15:17:30 mike Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -32,6 +32,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include "index.h" +#include "orddict.h" #include #include @@ -99,21 +100,10 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) } static void searchRecordKey(ZebraHandle zh, -#if NEW_REC_KEYS zebra_rec_keys_t reckeys, -#else - const struct recKeys *reckeys, -#endif int attrSetS, int attrUseS, const char **ws, int ws_length) { -#if NEW_REC_KEYS -#else - void *decode_handle = iscz1_start(); - int off = 0; - int startSeq = -1; - int seqno = 0; -#endif int i; int ch; @@ -125,7 +115,6 @@ static void searchRecordKey(ZebraHandle zh, if (ch < 0) return ; -#if NEW_REC_KEYS if (zebra_rec_keys_rewind(reckeys)) { int startSeq = -1; @@ -151,36 +140,6 @@ static void searchRecordKey(ZebraHandle zh, } } } -#else - while (off < reckeys->buf_used) - { - const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; - - iscz1_decode(decode_handle, &dst, &src); - assert(key.len <= 4 && key.len > 2); - - seqno = (int) key.mem[key.len-1]; - - if (key.mem[0] == ch) - { - int woff; - - if (startSeq == -1) - startSeq = seqno; - woff = seqno - startSeq; - if (woff >= 0 && woff < ws_length) - ws[woff] = src; - } - - while (*src++) - ; - off = src - reckeys->buf; - } - iscz1_stop(decode_handle); - assert (off == reckeys->buf_used); -#endif } struct file_read_info { @@ -248,12 +207,10 @@ static void file_end (void *handle, off_t offset) } } +#define FILE_MATCH_BLANK "\t " + static char *fileMatchStr (ZebraHandle zh, -#if NEW_REC_KEYS zebra_rec_keys_t reckeys, -#else - struct recKeys *reckeys, -#endif const char *fname, const char *spec) { static char dstBuf[2048]; /* static here ??? */ @@ -262,8 +219,8 @@ static char *fileMatchStr (ZebraHandle zh, while (1) { - while (*s == ' ' || *s == '\t') - s++; + for (; *s && strchr(FILE_MATCH_BLANK, *s); s++) + ; if (!*s) break; if (*s == '(') @@ -272,21 +229,26 @@ static char *fileMatchStr (ZebraHandle zh, char attset_str[64], attname_str[64]; data1_attset *attset; int i; - char matchFlag[32]; int attSet = 1, attUse = 1; int first = 1; - - s++; - for (i = 0; *s && *s != ',' && *s != ')'; s++) - if (i < 63) + + for (s++; strchr(FILE_MATCH_BLANK, *s); s++) + ; + for (i = 0; *s && *s != ',' && *s != ')' && + !strchr(FILE_MATCH_BLANK, *s); s++) + if (i+1 < sizeof(attset_str)) attset_str[i++] = *s; attset_str[i] = '\0'; - + + for (; strchr(FILE_MATCH_BLANK, *s); s++) + ; if (*s == ',') { - s++; - for (i = 0; *s && *s != ')'; s++) - if (i < 63) + for (s++; strchr(FILE_MATCH_BLANK, *s); s++) + ; + for (i = 0; *s && *s != ')' && + !strchr(FILE_MATCH_BLANK, *s); s++) + if (i+1 < sizeof(attname_str)) attname_str[i++] = *s; attname_str[i] = '\0'; } @@ -303,12 +265,7 @@ static char *fileMatchStr (ZebraHandle zh, } searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); - if (*s == ')') - { - for (i = 0; i<32; i++) - matchFlag[i] = 1; - } - else + if (*s != ')') { yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s", spec, zh->m_group ? zh->m_group : "none"); @@ -317,7 +274,7 @@ static char *fileMatchStr (ZebraHandle zh, s++; for (i = 0; i<32; i++) - if (matchFlag[i] && ws[i]) + if (ws[i]) { if (first) { @@ -340,12 +297,12 @@ static char *fileMatchStr (ZebraHandle zh, char special[64]; const char *spec_src = NULL; const char *s1 = ++s; - while (*s1 && *s1 != ' ' && *s1 != '\t') + while (*s1 && !strchr(FILE_MATCH_BLANK, *s1)) s1++; spec_len = s1 - s; - if (spec_len > 63) - spec_len = 63; + if (spec_len > sizeof(special)-1) + spec_len = sizeof(special)-1; memcpy (special, s, spec_len); special[spec_len] = '\0'; s = s1; @@ -375,7 +332,7 @@ static char *fileMatchStr (ZebraHandle zh, while (*s && *s != stopMarker) { - if (i < 63) + if (i+1 < sizeof(tmpString)) tmpString[i++] = *s++; } if (*s) @@ -408,15 +365,6 @@ struct recordLogInfo { struct recordGroup *rGroup; }; -#if NEW_REC_KEYS -#else -void create_rec_keys_codec(struct recKeys *keys) -{ - keys->buf_used = 0; - iscz1_reset(keys->codec_handle); -} -#endif - static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) { int i; @@ -458,19 +406,11 @@ static int file_extract_record(ZebraHandle zh, if (fi->fd != -1) { /* we are going to read from a file, so prepare the extraction */ -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->keys); -#else - create_rec_keys_codec(&zh->reg->keys); -#endif #if NATTR -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->sortKeys); #else - create_rec_keys_codec(&zh->reg->sortKeys); -#endif -#else zh->reg->sortKeys.buf_used = 0; #endif recordOffset = fi->file_moffset; @@ -543,13 +483,8 @@ static int file_extract_record(ZebraHandle zh, if (matchStr == 0 && zh->m_record_id && *zh->m_record_id) { -#if NEW_REC_KEYS matchStr = fileMatchStr (zh, zh->reg->keys, fname, zh->m_record_id); -#else - matchStr = fileMatchStr (zh, &zh->reg->keys, fname, - zh->m_record_id); -#endif if (!matchStr) { yaz_log(YLOG_WARN, "Bad match criteria"); @@ -558,7 +493,9 @@ static int file_extract_record(ZebraHandle zh, } if (matchStr) { - char *rinfo = dict_lookup (zh->reg->matchDict, matchStr); + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord, + matchStr); if (rinfo) { assert(*rinfo == sizeof(*sysno)); @@ -566,13 +503,7 @@ static int file_extract_record(ZebraHandle zh, } } } - if (! *sysno -#if NEW_REC_KEYS - && zebra_rec_keys_empty(zh->reg->keys) -#else - && zh->reg->keys.buf_used == 0 -#endif - ) + if (! *sysno && zebra_rec_keys_empty(zh->reg->keys) ) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ @@ -607,34 +538,27 @@ static int file_extract_record(ZebraHandle zh, if (matchStr) { - dict_insert (zh->reg->matchDict, matchStr, sizeof(*sysno), sysno); + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + dict_insert_ord(zh->reg->matchDict, db_ord, matchStr, + sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#if NEW_REC_KEYS +#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); -#else - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, - recordAttr->staticrank); -#endif zh->records_inserted++; } else { /* record already exists */ -#if NEW_REC_KEYS zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#else - struct recKeys delkeys; -#endif #if NATTR -#if NEW_REC_KEYS zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); #else - struct recKeys sortKeys; -#endif -#else struct sortKeys sortKeys; #endif @@ -643,18 +567,12 @@ static int file_extract_record(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); -#if NEW_REC_KEYS zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); -#else - /* flush old keys for sort&search etc. */ - delkeys.buf_used = rec->size[recInfo_delKeys]; - delkeys.buf = rec->info[recInfo_delKeys]; -#endif -#if NEW_REC_KEYS && NATTR +#if NATTR zebra_rec_keys_set_buf(sortKeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], @@ -666,27 +584,16 @@ static int file_extract_record(ZebraHandle zh, extract_flushSortKeys (zh, *sysno, 0, &sortKeys); #endif -#if NEW_REC_KEYS extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); /* old values */ -#else - extract_flushRecordKeys (zh, *sysno, 0, &delkeys, - recordAttr->staticrank); /* old values */ -#endif if (deleteFlag) { /* record going to be deleted */ - if ( -#if NEW_REC_KEYS - zebra_rec_keys_empty(delkeys) -#else - !delkeys.buf_used -#endif - ) + if (zebra_rec_keys_empty(delkeys)) { yaz_log (YLOG_LOG, "delete %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); - yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false"); + yaz_log (YLOG_WARN, "cannot delete file above, storeKeys false (1)"); } else { @@ -695,7 +602,10 @@ static int file_extract_record(ZebraHandle zh, zh->m_record_type, fname, recordOffset); zh->records_deleted++; if (matchStr) - dict_delete (zh->reg->matchDict, matchStr); + { + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + dict_delete_ord(zh->reg->matchDict, db_ord, matchStr); + } rec_del (zh->reg->records, &rec); } rec_rm (&rec); @@ -704,41 +614,22 @@ static int file_extract_record(ZebraHandle zh, } else { - /* record going to be updated */ - if ( -#if NEW_REC_KEYS - zebra_rec_keys_empty(delkeys) -#else - !delkeys.buf_used -#endif - ) - { + /* flush new keys for sort&search etc */ + if (zh->records_processed < zh->m_file_verbose_limit) yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, zh->m_record_type, fname, recordOffset); - yaz_log (YLOG_WARN, "cannot update file above, storeKeys false"); - } - else - { - /* flush new keys for sort&search etc */ - if (zh->records_processed < zh->m_file_verbose_limit) - yaz_log (YLOG_LOG, "update %s %s " PRINTF_OFF_T, - zh->m_record_type, fname, recordOffset); - recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#if NEW_REC_KEYS - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); #else - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, - recordAttr->staticrank); + extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); #endif - zh->records_updated++; - } + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); + zh->records_updated++; } -#if NEW_REC_KEYS zebra_rec_keys_close(delkeys); -#endif -#if NATTR && NEW_REC_KWYS +#if NATTR zebra_rec_keys_close(sortKeys); #endif } @@ -754,25 +645,11 @@ static int file_extract_record(ZebraHandle zh, /* update delete keys */ xfree (rec->info[recInfo_delKeys]); - if ( -#if NEW_REC_KEYS - !zebra_rec_keys_empty(zh->reg->keys) -#else - zh->reg->keys.buf_used > 0 -#endif - && zh->m_store_keys == 1) + if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1) { -#if NEW_REC_KEYS zebra_rec_keys_get_buf(zh->reg->keys, &rec->info[recInfo_delKeys], &rec->size[recInfo_delKeys]); -#else - - rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; - rec->info[recInfo_delKeys] = zh->reg->keys.buf; - zh->reg->keys.buf = NULL; - zh->reg->keys.buf_max = 0; -#endif } else { @@ -783,10 +660,16 @@ static int file_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -972,6 +855,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, int force_update, int allow_update) { + SYSNO sysno0 = 0; RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int r; @@ -1002,18 +886,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, extractCtrl.first_record = 1; extractCtrl.fh = &fc; -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->keys); -#else - create_rec_keys_codec(&zh->reg->keys); -#endif + #if NATTR -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->sortKeys); #else - create_rec_keys_codec(&zh->reg->sortKeys); -#endif -#else zh->reg->sortKeys.buf_used = 0; #endif if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) @@ -1076,25 +953,20 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_WARN, "extract error: no such filter"); return ZEBRA_FAIL; } - /* match criteria */ - matchStr = NULL; if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; - if (! *sysno) { - char *rinfo; + if (!sysno) { + + sysno = &sysno0; + if (match_criteria && *match_criteria) { matchStr = match_criteria; } else { if (zh->m_record_id && *zh->m_record_id) { -#if NEW_REC_KEYS matchStr = fileMatchStr (zh, zh->reg->keys, pr_fname, zh->m_record_id); -#else - matchStr = fileMatchStr (zh, &zh->reg->keys, pr_fname, - zh->m_record_id); -#endif if (!matchStr) { yaz_log (YLOG_WARN, "Bad match criteria (recordID)"); @@ -1102,8 +974,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } } } - if (matchStr) { - rinfo = dict_lookup (zh->reg->matchDict, matchStr); + if (matchStr) + { + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord, + matchStr); if (rinfo) { assert(*rinfo == sizeof(*sysno)); @@ -1111,13 +986,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } } } - if ( -#if NEW_REC_KEYS - zebra_rec_keys_empty(zh->reg->keys) -#else - zh->reg->keys.buf_used == 0 -#endif -) + if (zebra_rec_keys_empty(zh->reg->keys)) { /* the extraction process returned no information - the record is probably empty - unless flagShowRecords is in use */ @@ -1130,8 +999,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* new record */ if (delete_flag) { - if (show_progress) - yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, + yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, pr_fname, (long) recordOffset); yaz_log (YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; @@ -1148,45 +1016,36 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, if (matchStr) { - dict_insert (zh->reg->matchDict, matchStr, - sizeof(*sysno), sysno); + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + dict_insert_ord(zh->reg->matchDict, db_ord, matchStr, + sizeof(*sysno), sysno); } +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); +#else extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); +#endif -#if NEW_REC_KEYS #if 0 print_rec_keys(zh, zh->reg->keys); #endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); -#else -#if 0 - print_rec_keys(zh, &zh->reg->keys); -#endif - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, - recordAttr->staticrank); -#endif - zh->records_inserted++; } else { /* record already exists */ -#if NEW_REC_KEYS zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#else - struct recKeys delkeys; -#endif #if NATTR - struct recKeys sortKeys; + zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); #else struct sortKeys sortKeys; #endif if (!allow_update) { - if (show_progress) - yaz_log (YLOG_LOG, "skipped %s %s %ld", + yaz_log (YLOG_LOG, "skipped %s %s %ld", recordType, pr_fname, (long) recordOffset); logRecord(zh); return ZEBRA_FAIL; @@ -1197,48 +1056,36 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, recordAttr = rec_init_attr (zh->reg->zei, rec); -#if NEW_REC_KEYS zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); +#if NATTR + zebra_rec_keys_set_buf(sortKeys, + rec->info[recInfo_sortKeys], + rec->size[recInfo_sortKeys], + 0); #else - delkeys.buf_used = rec->size[recInfo_delKeys]; - delkeys.buf = rec->info[recInfo_delKeys]; -#endif sortKeys.buf_used = rec->size[recInfo_sortKeys]; sortKeys.buf = rec->info[recInfo_sortKeys]; +#endif -#if NEW_REC_KEYS && NATTR +#if NATTR extract_flushSortKeys (zh, *sysno, 0, sortKeys); #else extract_flushSortKeys (zh, *sysno, 0, &sortKeys); #endif -#if NEW_REC_KEYS extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); -#else - extract_flushRecordKeys (zh, *sysno, 0, &delkeys, - recordAttr->staticrank); -#endif if (delete_flag) { /* record going to be deleted */ - if ( -#if NEW_REC_KEYS - zebra_rec_keys_empty(delkeys) -#else - !delkeys.buf_used -#endif - ) + if (zebra_rec_keys_empty(delkeys)) { - if (show_progress) - { - yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, - pr_fname, (long) recordOffset); - yaz_log (YLOG_WARN, "cannot delete file above, " - "storeKeys false"); - } + yaz_log (YLOG_LOG, "delete %s %s %ld", recordType, + pr_fname, (long) recordOffset); + yaz_log (YLOG_WARN, "cannot delete file above, " + "storeKeys false (3)"); } else { @@ -1247,7 +1094,10 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, pr_fname, (long) recordOffset); zh->records_deleted++; if (matchStr) - dict_delete (zh->reg->matchDict, matchStr); + { + int db_ord = zebraExplain_get_database_ord(zh->reg->zei); + dict_delete_ord(zh->reg->matchDict, db_ord, matchStr); + } rec_del (zh->reg->records, &rec); } rec_rm (&rec); @@ -1256,43 +1106,21 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } else { - /* record going to be updated */ - if ( -#if NEW_REC_KEYS - zebra_rec_keys_empty(delkeys) -#else - !delkeys.buf_used -#endif - ) - { - if (show_progress) - { - yaz_log (YLOG_LOG, "update %s %s %ld", recordType, - pr_fname, (long) recordOffset); - yaz_log (YLOG_WARN, "cannot update file above, storeKeys false"); - } - } - else - { - if (show_progress) + if (show_progress) yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); - recordAttr->staticrank = extractCtrl.staticrank; - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#if NEW_REC_KEYS - extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, - recordAttr->staticrank); + recordAttr->staticrank = extractCtrl.staticrank; +#if NATTR + extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); #else - extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys, - recordAttr->staticrank); + extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); #endif - zh->records_updated++; - } + extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, + recordAttr->staticrank); + zh->records_updated++; } -#if NEW_REC_KEYS zebra_rec_keys_close(delkeys); -#endif -#if NEW_REC_KEYS && NATTR +#if NATTR zebra_rec_keys_close(sortKeys); #endif } @@ -1308,25 +1136,11 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update delete keys */ xfree (rec->info[recInfo_delKeys]); - if ( -#if NEW_REC_KEYS - !zebra_rec_keys_empty(zh->reg->keys) -#else - zh->reg->keys.buf_used > 0 -#endif - && zh->m_store_keys == 1) + if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1) { -#if NEW_REC_KEYS zebra_rec_keys_get_buf(zh->reg->keys, &rec->info[recInfo_delKeys], &rec->size[recInfo_delKeys]); -#else - - rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; - rec->info[recInfo_delKeys] = zh->reg->keys.buf; - zh->reg->keys.buf = NULL; - zh->reg->keys.buf_max = 0; -#endif } else { @@ -1336,10 +1150,16 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1408,19 +1228,11 @@ int explain_extract (void *handle, Record rec, data1_node *n) abort (); } -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->keys); -#else - create_rec_keys_codec(&zh->reg->keys); -#endif #if NATTR -#if NEW_REC_KEYS zebra_rec_keys_reset(zh->reg->sortKeys); #else - create_rec_keys_codec(&zh->reg->sortKeys); -#endif -#else zh->reg->sortKeys.buf_used = 0; #endif extractCtrl.init = extract_init; @@ -1443,34 +1255,20 @@ int explain_extract (void *handle, Record rec, data1_node *n) if (rec->size[recInfo_delKeys]) { -#if NEW_REC_KEYS zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#else - struct recKeys delkeys; -#endif - + #if NATTR -#if NEW_REC_KEYS - zebra_rec_keys_t sortkeys = zzebra_rec_keys_open(); -#else - struct recKeys sortkeys; -#endif + zebra_rec_keys_t sortkeys = zebra_rec_keys_open(); #else struct sortKeys sortkeys; #endif -#if NEW_REC_KEYS zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); zebra_rec_keys_close(delkeys); -#else - delkeys.buf_used = rec->size[recInfo_delKeys]; - delkeys.buf = rec->info[recInfo_delKeys]; - extract_flushRecordKeys (zh, rec->sysno, 0, &delkeys, 0); -#endif -#if NATTR && NEW_REC_KEYS +#if NATTR zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); @@ -1483,53 +1281,38 @@ int explain_extract (void *handle, Record rec, data1_node *n) extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys); #endif } -#if NEW_REC_KEYS extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); -#else - extract_flushRecordKeys (zh, rec->sysno, 1, &zh->reg->keys, 0); -#endif -#if NATTR && NEW_REC_KEYS +#if NATTR extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); #else extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys); #endif xfree (rec->info[recInfo_delKeys]); -#if NEW_REC_KEYS zebra_rec_keys_get_buf(zh->reg->keys, &rec->info[recInfo_delKeys], &rec->size[recInfo_delKeys]); -#else - rec->size[recInfo_delKeys] = zh->reg->keys.buf_used; - rec->info[recInfo_delKeys] = zh->reg->keys.buf; - zh->reg->keys.buf = NULL; - zh->reg->keys.buf_max = 0; -#endif xfree (rec->info[recInfo_sortKeys]); +#if NATTR + zebra_rec_keys_get_buf(zh->reg->sortKeys, + &rec->info[recInfo_sortKeys], + &rec->size[recInfo_sortKeys]); +#else rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; zh->reg->sortKeys.buf = NULL; zh->reg->sortKeys.buf_max = 0; +#endif return 0; } void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, -#if NEW_REC_KEYS zebra_rec_keys_t reckeys, -#else - struct recKeys *reckeys, -#endif zint staticrank) { -#if NEW_REC_KEYS -#else - void *decode_handle = iscz1_start(); - int off = 0; - int ch = 0; -#endif ZebraExplainInfo zei = zh->reg->zei; if (!zh->reg->key_buf) @@ -1550,7 +1333,6 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, } zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1); -#if NEW_REC_KEYS if (zebra_rec_keys_rewind(reckeys)) { size_t slen; @@ -1590,6 +1372,12 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, if (zh->m_staticrank) /* rank config enabled ? */ { + if (staticrank < 0) + { + yaz_log(YLOG_WARN, "staticrank = %ld. Setting to 0", + (long) staticrank); + staticrank = 0; + } *keyp++ = staticrank; key_out.len = 4; } @@ -1608,60 +1396,6 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, (zh->reg->key_buf_used) += sizeof(key_out); } } -#else - while (off < reckeys->buf_used) - { - const char *src = reckeys->buf + off; - struct it_key key_in; - struct it_key key_out; - char *dst = (char*) &key_in; - zint *keyp = key_out.mem; - - iscz1_decode(decode_handle, &dst, &src); - assert(key_in.len == 4); - - if (zh->reg->key_buf_used + 1024 > - (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) - extract_flushWriteKeys(zh, 0); - ++(zh->reg->ptr_i); - assert(zh->reg->ptr_i > 0); - (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = - (char*)zh->reg->key_buf + zh->reg->key_buf_used; - - ch = (int) key_in.mem[0]; /* ordinal for field/use/attribute */ - - zh->reg->key_buf_used += - key_SU_encode(ch, ((char*)zh->reg->key_buf) + - zh->reg->key_buf_used); - while (*src) - ((char*)zh->reg->key_buf) [(zh->reg->key_buf_used)++] = *src++; - src++; - ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0'; - ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd; - - if (zh->m_staticrank) /* rank config enabled ? */ - { - *keyp++ = staticrank; - key_out.len = 4; - } - else - key_out.len = 3; - - if (key_in.mem[1]) /* filter specified record ID */ - *keyp++ = key_in.mem[1]; - else - *keyp++ = sysno; - *keyp++ = key_in.mem[2]; /* section_id */ - *keyp++ = key_in.mem[3]; /* sequence .. */ - - memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, - &key_out, sizeof(key_out)); - (zh->reg->key_buf_used) += sizeof(key_out); - off = src - reckeys->buf; - } - assert (off == reckeys->buf_used); - iscz1_stop(decode_handle); -#endif } void extract_flushWriteKeys (ZebraHandle zh, int final) @@ -1798,56 +1532,11 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -#if NEW_REC_KEYS -void extract_add_it_key (ZebraHandle zh, - zebra_rec_keys_t *keys, - int reg_type, - const char *str, int slen, struct it_key *key) -{ - zebra_rec_keys_write(*keys, reg_type, str, slen, key); -} -#else -void extract_add_it_key (ZebraHandle zh, - struct recKeys *keys, - int reg_type, - const char *str, int slen, struct it_key *key) -{ - char *dst; - const char *src = (char*) key; - - if (keys->buf_used+1024 > keys->buf_max) - { - char *b = (char *) xmalloc (keys->buf_max += 128000); - if (keys->buf_used > 0) - memcpy (b, keys->buf, keys->buf_used); - xfree (keys->buf); - keys->buf = b; - } - dst = keys->buf + keys->buf_used; - - iscz1_encode(keys->codec_handle, &dst, &src); - -#if REG_TYPE_PREFIX - *dst++ = reg_type; -#endif - memcpy (dst, str, slen); - dst += slen; - *dst++ = '\0'; - keys->buf_used = dst - keys->buf; -} -#endif - ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, -#if NEW_REC_KEYS zebra_rec_keys_t reckeys, -#else - struct recKeys *reckeys, -#endif zebra_snippets *snippets) { NMEM nmem = nmem_create(); - -#if NEW_REC_KEYS if (zebra_rec_keys_rewind(reckeys)) { const char *str; @@ -1872,52 +1561,12 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, nmem_reset(nmem); } } -#else - int off = 0; - int seqno = 0; - void *decode_handle = iscz1_start(); - assert(reckeys->buf); - while (off < reckeys->buf_used) - { - const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; - char dst_buf[IT_MAX_WORD]; - char *dst_term = dst_buf; - int index_type = 0, ord; - - iscz1_decode(decode_handle, &dst, &src); - assert(key.len <= 4 && key.len > 2); - - seqno = (int) key.mem[key.len-1]; - ord = key.mem[0]; - - zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, - 0/* db */, 0/* set */, 0/* use */); - assert(index_type); - zebra_term_untrans_iconv(zh, nmem, index_type, - &dst_term, src); - zebra_snippets_append(snippets, seqno, ord, dst_term); - while (*src++) - ; - off = src - reckeys->buf; - nmem_reset(nmem); - } - iscz1_stop(decode_handle); -#endif nmem_destroy(nmem); return ZEBRA_OK; } -void print_rec_keys(ZebraHandle zh, -#if NEW_REC_KEYS - zebra_rec_keys_t reckeys -#else - struct recKeys *reckeys -#endif -) +void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) { -#if NEW_REC_KEYS yaz_log(YLOG_LOG, "print_rec_keys"); if (zebra_rec_keys_rewind(reckeys)) { @@ -1943,40 +1592,6 @@ void print_rec_keys(ZebraHandle zh, key.mem[0], seqno, dst_buf); } } -#else - int off = 0; - int seqno = 0; - void *decode_handle = iscz1_start(); - yaz_log(YLOG_LOG, "print_rec_keys buf=%p sz=%d", reckeys->buf, - reckeys->buf_used); - assert(reckeys->buf); - while (off < reckeys->buf_used) - { - const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; - char dst_buf[IT_MAX_WORD]; - int index_type; - const char *db = 0; - - iscz1_decode(decode_handle, &dst, &src); - assert(key.len <= 4 && key.len > 2); - - seqno = (int) key.mem[key.len-1]; - - zebraExplain_lookup_ord(zh->reg->zei, - key.mem[0], &index_type, &db, 0, 0); - - zebra_term_untrans(zh, index_type, dst_buf, src); - - yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s", - key.mem[0], seqno, dst_buf); - while (*src++) - ; - off = src - reckeys->buf; - } - iscz1_stop(decode_handle); -#endif } void extract_add_index_string (RecWord *p, const char *str, int length) @@ -2012,16 +1627,23 @@ void extract_add_index_string (RecWord *p, const char *str, int length) key.mem[3] = p->seqno; #if 0 - /* just for debugging .. */ - yaz_log(YLOG_LOG, "add: set=%d use=%d " - "record_id=%lld section_id=%lld seqno=%lld", - p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); + if (1) + { + char strz[80]; + int i; + + strz[0] = 0; + for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, + strz); + } #endif - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->keys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->keys, str, length, &key); } #if NATTR @@ -2049,10 +1671,7 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) key.mem[2] = p->section_id; key.mem[3] = p->seqno; - extract_add_it_key(p->extractCtrl->handle, - &zh->reg->sortKeys, - p->index_type, str, - length, &key); + zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } #else static void extract_add_sort_string (RecWord *p, const char *str, int length) @@ -2265,6 +1884,7 @@ void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid) } #if NATTR +#error not done yet with zebra_rec_keys_t void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct recKeys *reckeys) { @@ -2335,6 +1955,7 @@ void encode_key_init (struct encode_info *i) i->prevcmd=-1; i->keylen=0; i->encode_handle = iscz1_start(); + i->decode_handle = iscz1_start(); } #define OLDENCODE 1 @@ -2356,19 +1977,42 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) /* and copy & align key so we can mangle */ memcpy (&key, k+1, sizeof(struct it_key)); /* *k is insert/delete */ +#if 0 + /* debugging */ + key_logdump_txt(YLOG_LOG, &key, *k ? "i" : "d"); +#endif + assert(key.mem[0] >= 0); + bp0 = bp++; iscz1_encode(i->encode_handle, &bp, &src); + *bp0 = (*k * 128) + bp - bp0 - 1; /* length and insert/delete combined */ if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) { yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); exit (1); } + +#if 0 + /* debugging */ + if (1) + { + struct it_key key2; + const char *src = bp0+1; + char *dst = (char*) &key2; + iscz1_decode(i->decode_handle, &dst, &src); + + key_logdump_txt(YLOG_LOG, &key2, *k ? "i" : "d"); + + assert(key2.mem[1]); + } +#endif } void encode_key_flush (struct encode_info *i, FILE *outf) -{ /* dummy routine */ +{ iscz1_stop(i->encode_handle); + iscz1_stop(i->decode_handle); } #else