X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=a980bcf26b0fede3d9ba9f7da960bb4100fd773c;hb=27742a4ea82e9b3494c166203b06d1d7c48da923;hp=3f218627ae218be4b17f323d3b2af6ed05288958;hpb=ecb3935e78cd9bcfdebafdee0834cfb1060d7b5e;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 3f21862..a980bcf 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $ - Copyright (C) 1995-2005 +/* $Id: extract.c,v 1.218 2006-05-30 13:44:44 adam Exp $ + Copyright (C) 1995-2006 Index Data ApS This file is part of the Zebra server. @@ -36,12 +36,38 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#if _FILE_OFFSET_BITS == 64 -#define PRINTF_OFF_T "%Ld" +#ifdef WIN32 +#define PRINTF_OFF_T "%I64d" +#else +/* !WIN32 */ +#if SIZEOF_OFF_T == SIZEOF_LONG_LONG +#define PRINTF_OFF_T "%lld" #else #define PRINTF_OFF_T "%ld" #endif +#endif + +#define ENCODE_BUFLEN 768 +struct encode_info { +#if 0 + int sysno; /* previously written values for delta-compress */ + int seqno; + int cmd; + int prevsys; /* buffer for skipping insert/delete pairs */ + int prevseq; + int prevcmd; + int keylen; /* tells if we have an unwritten key in buf, and how long*/ +#endif + void *encode_handle; + void *decode_handle; + char buf[ENCODE_BUFLEN]; +}; + +static void encode_key_init (struct encode_info *i); +static void encode_key_write (char *k, struct encode_info *i, FILE *outf); +static void encode_key_flush (struct encode_info *i, FILE *outf); + #define USE_SHELLSORT 0 #if USE_SHELLSORT @@ -81,18 +107,15 @@ static void logRecord (ZebraHandle zh) } } +static void extract_add_index_string (RecWord *p, const char *str, int length); + static void extract_set_store_data_prepare(struct recExtractCtrl *p); static void extract_init (struct recExtractCtrl *p, RecWord *w) { w->zebra_maps = p->zebra_maps; w->seqno = 1; -#if NATTR -#else - w->attrSet = VAL_BIB1; - w->attrUse = 1016; -#endif - w->index_name = 0; + w->index_name = "any"; w->index_type = 'w'; w->extractCtrl = p; w->record_id = 0; @@ -101,17 +124,22 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) static void searchRecordKey(ZebraHandle zh, zebra_rec_keys_t reckeys, - int attrSetS, int attrUseS, + const char *index_name, const char **ws, int ws_length) { int i; - int ch; + int ch = -1; for (i = 0; ireg->zei, - attrSetS, attrUseS); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, '0', index_name); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'p', index_name); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'w', index_name); + if (ch < 0) return ; @@ -227,9 +255,7 @@ static char *fileMatchStr (ZebraHandle zh, { const char *ws[32]; char attset_str[64], attname_str[64]; - data1_attset *attset; int i; - int attSet = 1, attUse = 1; int first = 1; for (s++; strchr(FILE_MATCH_BLANK, *s); s++) @@ -242,7 +268,9 @@ static char *fileMatchStr (ZebraHandle zh, for (; strchr(FILE_MATCH_BLANK, *s); s++) ; - if (*s == ',') + if (*s != ',') + strcpy(attname_str, attset_str); + else { for (s++; strchr(FILE_MATCH_BLANK, *s); s++) ; @@ -252,18 +280,8 @@ static char *fileMatchStr (ZebraHandle zh, attname_str[i++] = *s; attname_str[i] = '\0'; } - - if ((attset = data1_get_attset (zh->reg->dh, attset_str))) - { - data1_att *att; - attSet = attset->reference; - att = data1_getattbyname(zh->reg->dh, attset, attname_str); - if (att) - attUse = att->value; - else - attUse = atoi (attname_str); - } - searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); + + searchRecordKey (zh, reckeys, attname_str, ws, 32); if (*s != ')') { @@ -379,6 +397,16 @@ static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) ctrl->flagShowRecords = !zh->m_flag_rw; } +static void all_matches_add(struct recExtractCtrl *ctrl) +{ + RecWord word; + extract_init(ctrl, &word); + word.index_name = "allrecords"; + word.index_type = 'w'; + word.seqno = 1; + extract_add_index_string (&word, "", 0); +} + static ZEBRA_RES file_extract_record(ZebraHandle zh, SYSNO *sysno, const char *fname, int deleteFlag, @@ -387,6 +415,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, RecType recType, void *recTypeClientData) { + const char *match_str_to_print = ""; RecordAttr *recordAttr; int r; const char *matchStr = 0; @@ -408,11 +437,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* we are going to read from a file, so prepare the extraction */ zebra_rec_keys_reset(zh->reg->keys); -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif recordOffset = fi->file_moffset; extractCtrl.handle = zh; extractCtrl.offset = fi->file_moffset; @@ -471,11 +496,18 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, } return ZEBRA_FAIL; } + all_matches_add(&extractCtrl); if (extractCtrl.match_criteria[0]) - matchStr = extractCtrl.match_criteria; + matchStr = extractCtrl.match_criteria; } - /* perform match if sysno not known and if match criteria is specified */ + /* if matchStr is set now - we assume it's printable . + For internal matchStr (see below) we don't print */ + if (matchStr) + match_str_to_print = matchStr; + + /* perform internal match if sysno not known and if match criteria is + specified already */ if (!sysno) { sysnotmp = 0; @@ -488,6 +520,11 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, if (!matchStr) { yaz_log(YLOG_WARN, "Bad match criteria"); + + if (zebra_rec_keys_empty(zh->reg->keys)) + { + yaz_log(YLOG_WARN, "And no index keys"); + } return ZEBRA_FAIL; } } @@ -532,17 +569,12 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, *sysno = rec->sysno; if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) + { yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T " " ZINT_FORMAT " %s" , zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - + fname, recordOffset, *sysno, match_str_to_print); + } recordAttr = rec_init_attr (zh->reg->zei, rec); recordAttr->staticrank = extractCtrl.staticrank; @@ -554,11 +586,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, } -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -568,11 +596,7 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); -#else - struct sortKeys sortKeys; -#endif rec = rec_get (zh->reg->records, *sysno); assert (rec); @@ -584,18 +608,11 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, rec->size[recInfo_delKeys], 0); -#if NATTR zebra_rec_keys_set_buf(sortKeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); extract_flushSortKeys (zh, *sysno, 0, sortKeys); -#else - sortKeys.buf_used = rec->size[recInfo_sortKeys]; - sortKeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, *sysno, 0, &sortKeys); -#endif - extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); /* old values */ if (deleteFlag) @@ -611,19 +628,12 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, else { if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) + { yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T " " ZINT_FORMAT " %s" , zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - - - + fname, recordOffset, *sysno, match_str_to_print); + } zh->records_deleted++; if (matchStr) { @@ -640,31 +650,20 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, { /* flush new keys for sort&search etc */ if (zh->records_processed < zh->m_file_verbose_limit) - if (matchStr) - yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T - " " ZINT_FORMAT " %s" , - zh->m_record_type, - fname, recordOffset, *sysno, matchStr); - else - yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T - " " ZINT_FORMAT , - zh->m_record_type, - fname, recordOffset, *sysno); - + { + yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T + " " ZINT_FORMAT " %s" , + zh->m_record_type, + fname, recordOffset, *sysno, match_str_to_print); + } recordAttr->staticrank = extractCtrl.staticrank; -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); -#if NATTR zebra_rec_keys_close(sortKeys); -#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -693,16 +692,9 @@ static ZEBRA_RES file_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -927,12 +919,8 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, extractCtrl.fh = &fc; zebra_rec_keys_reset(zh->reg->keys); - -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif + if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0])) { if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], @@ -994,6 +982,8 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, return ZEBRA_FAIL; } + all_matches_add(&extractCtrl); + if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; @@ -1062,15 +1052,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, } -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif - -#if 0 - print_rec_keys(zh, zh->reg->keys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_inserted++; @@ -1079,12 +1061,7 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, { /* record already exists */ zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortKeys = zebra_rec_keys_open(); -#else - struct sortKeys sortKeys; -#endif - if (!allow_update) { yaz_log (YLOG_LOG, "skipped %s %s %ld", @@ -1102,21 +1079,12 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); -#if NATTR zebra_rec_keys_set_buf(sortKeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); -#else - sortKeys.buf_used = rec->size[recInfo_sortKeys]; - sortKeys.buf = rec->info[recInfo_sortKeys]; -#endif -#if NATTR extract_flushSortKeys (zh, *sysno, 0, sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 0, &sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 0, delkeys, recordAttr->staticrank); if (delete_flag) @@ -1152,19 +1120,13 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, yaz_log (YLOG_LOG, "update %s %s %ld", recordType, pr_fname, (long) recordOffset); recordAttr->staticrank = extractCtrl.staticrank; -#if NATTR extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); -#endif extract_flushRecordKeys (zh, *sysno, 1, zh->reg->keys, recordAttr->staticrank); zh->records_updated++; } zebra_rec_keys_close(delkeys); -#if NATTR zebra_rec_keys_close(sortKeys); -#endif } /* update file type */ xfree (rec->info[recInfo_fileType]); @@ -1192,16 +1154,9 @@ ZEBRA_RES buffer_extract_record(ZebraHandle zh, /* update sort keys */ xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif /* save file size of original record */ zebraExplain_recordBytesIncrement (zh->reg->zei, @@ -1271,12 +1226,8 @@ int explain_extract (void *handle, Record rec, data1_node *n) } zebra_rec_keys_reset(zh->reg->keys); - -#if NATTR zebra_rec_keys_reset(zh->reg->sortKeys); -#else - zh->reg->sortKeys.buf_used = 0; -#endif + extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; @@ -1299,36 +1250,23 @@ int explain_extract (void *handle, Record rec, data1_node *n) { zebra_rec_keys_t delkeys = zebra_rec_keys_open(); -#if NATTR zebra_rec_keys_t sortkeys = zebra_rec_keys_open(); -#else - struct sortKeys sortkeys; -#endif zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); extract_flushRecordKeys (zh, rec->sysno, 0, delkeys, 0); zebra_rec_keys_close(delkeys); -#if NATTR + zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys], rec->size[recInfo_sortKeys], 0); extract_flushSortKeys (zh, rec->sysno, 0, sortkeys); zebra_rec_keys_close(sortkeys); -#else - sortkeys.buf_used = rec->size[recInfo_sortKeys]; - sortkeys.buf = rec->info[recInfo_sortKeys]; - extract_flushSortKeys (zh, rec->sysno, 0, &sortkeys); -#endif } extract_flushRecordKeys (zh, rec->sysno, 1, zh->reg->keys, 0); -#if NATTR extract_flushSortKeys (zh, rec->sysno, 1, zh->reg->sortKeys); -#else - extract_flushSortKeys (zh, rec->sysno, 1, &zh->reg->sortKeys); -#endif xfree (rec->info[recInfo_delKeys]); zebra_rec_keys_get_buf(zh->reg->keys, @@ -1336,20 +1274,65 @@ int explain_extract (void *handle, Record rec, data1_node *n) &rec->size[recInfo_delKeys]); xfree (rec->info[recInfo_sortKeys]); -#if NATTR zebra_rec_keys_get_buf(zh->reg->sortKeys, &rec->info[recInfo_sortKeys], &rec->size[recInfo_sortKeys]); -#else - rec->size[recInfo_sortKeys] = zh->reg->sortKeys.buf_used; - rec->info[recInfo_sortKeys] = zh->reg->sortKeys.buf; - zh->reg->sortKeys.buf = NULL; - zh->reg->sortKeys.buf_max = 0; -#endif return 0; } +void extract_rec_keys_adjust(ZebraHandle zh, int is_insert, + zebra_rec_keys_t reckeys) +{ + ZebraExplainInfo zei = zh->reg->zei; + struct ord_stat { + int no; + int ord; + struct ord_stat *next; + }; + + if (zebra_rec_keys_rewind(reckeys)) + { + struct ord_stat *ord_list = 0; + struct ord_stat *p; + size_t slen; + const char *str; + struct it_key key_in; + while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ord = key_in.mem[0]; + + for (p = ord_list; p ; p = p->next) + if (p->ord == ord) + { + p->no++; + break; + } + if (!p) + { + p = xmalloc(sizeof(*p)); + p->no = 1; + p->ord = ord; + p->next = ord_list; + ord_list = p; + } + } + + p = ord_list; + while (p) + { + struct ord_stat *p1 = p; + + if (is_insert) + zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1); + else + zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1); + p = p->next; + xfree(p1); + } + } +} + void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, zebra_rec_keys_t reckeys, @@ -1357,6 +1340,8 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, { ZebraExplainInfo zei = zh->reg->zei; + extract_rec_keys_adjust(zh, cmd, reckeys); + if (!zh->reg->key_buf) { int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8")); @@ -1403,7 +1388,7 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, zh->reg->key_buf_used += key_SU_encode(ch, (char*)zh->reg->key_buf + zh->reg->key_buf_used); - + /* copy the 0-terminated stuff from str to output */ memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen); zh->reg->key_buf_used += slen; @@ -1595,8 +1580,7 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, ord = key.mem[0]; zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, - 0/* db */, 0/* set */, 0/* use */, - 0 /* string_index */); + 0/* db */, 0 /* string_index */); assert(index_type); zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str); @@ -1625,7 +1609,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) assert(key.len <= 4 && key.len > 2); zebraExplain_lookup_ord(zh->reg->zei, - key.mem[0], &index_type, &db, 0, 0, 0); + key.mem[0], &index_type, &db, 0); seqno = (int) key.mem[key.len-1]; @@ -1637,7 +1621,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) } } -void extract_add_index_string (RecWord *p, const char *str, int length) +void extract_add_index_string(RecWord *p, const char *str, int length) { struct it_key key; @@ -1645,24 +1629,13 @@ void extract_add_index_string (RecWord *p, const char *str, int length) ZebraExplainInfo zei = zh->reg->zei; int ch; - if (p->index_name) - { - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); - } - else - { -#if NATTR - return; -#else - ch = zebraExplain_lookup_attr_su(zei, p->index_type, - p->attrSet, p->attrUse); - if (ch < 0) - ch = zebraExplain_add_attr_su(zei, p->index_type, - p->attrSet, p->attrUse); -#endif - } + if (!p->index_name) + return; + + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); + key.len = 4; key.mem[0] = ch; key.mem[1] = p->record_id; @@ -1689,8 +1662,7 @@ void extract_add_index_string (RecWord *p, const char *str, int length) zebra_rec_keys_write(zh->reg->keys, str, length, &key); } -#if NATTR -static void extract_add_sort_string (RecWord *p, const char *str, int length) +static void extract_add_sort_string(RecWord *p, const char *str, int length) { struct it_key key; @@ -1698,61 +1670,36 @@ static void extract_add_sort_string (RecWord *p, const char *str, int length) ZebraExplainInfo zei = zh->reg->zei; int ch; - if (p->index_name) - { - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); - } - else - { - return; - } + if (!p->index_name) + return; + + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); key.len = 4; key.mem[0] = ch; key.mem[1] = p->record_id; key.mem[2] = p->section_id; key.mem[3] = p->seqno; - zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); -} -#else -static void extract_add_sort_string (RecWord *p, const char *str, int length) -{ - ZebraHandle zh = p->extractCtrl->handle; - struct sortKeys *sk = &zh->reg->sortKeys; - int off = 0; - - while (off < sk->buf_used) - { - int set, use, slen; - - off += key_SU_decode(&set, (unsigned char *) sk->buf + off); - off += key_SU_decode(&use, (unsigned char *) sk->buf + off); - off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); - off += slen; - if (p->attrSet == set && p->attrUse == use) - return; - } - assert (off == sk->buf_used); - - if (sk->buf_used + IT_MAX_WORD > sk->buf_max) +#if 0 + if (1) { - char *b; - - b = (char *) xmalloc (sk->buf_max += 128000); - if (sk->buf_used > 0) - memcpy (b, sk->buf, sk->buf_used); - xfree (sk->buf); - sk->buf = b; + char strz[80]; + int i; + + strz[0] = 0; + for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, + strz); } - off += key_SU_encode(p->attrSet, sk->buf + off); - off += key_SU_encode(p->attrUse, sk->buf + off); - off += key_SU_encode(length, sk->buf + off); - memcpy (sk->buf + off, str, length); - sk->buf_used = off + length; -} #endif + zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); +} void extract_add_string (RecWord *p, const char *string, int length) { @@ -1926,88 +1873,37 @@ void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid) zebraExplain_addSchema (zh->reg->zei, oid); } -#if NATTR -#error not done yet with zebra_rec_keys_t void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct recKeys *reckeys) + int cmd, zebra_rec_keys_t reckeys) { - SortIdx sortIdx = zh->reg->sortIdx; - void *decode_handle = iscz1_start(); - int off = 0; - int ch = 0; - - while (off < reckeys->buf_used) + if (zebra_rec_keys_rewind(reckeys)) { - const char *src = reckeys->buf + off; - struct it_key key; - char *dst = (char*) &key; - - iscz1_decode(decode_handle, &dst, &src); - assert(key.len == 4); - - ch = (int) key.mem[0]; /* ordinal for field/use/attribute */ - - sortIdx_type(sortIdx, ch); - if (cmd == 1) - sortIdx_add(sortIdx, src, strlen(src)); - else - sortIdx_add(sortIdx, "", 1); - - src += strlen(src); - src++; - - off = src - reckeys->buf; - } - assert (off == reckeys->buf_used); - iscz1_stop(decode_handle); -} -#else -void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, - int cmd, struct sortKeys *sk) -{ - SortIdx sortIdx = zh->reg->sortIdx; - int off = 0; + SortIdx sortIdx = zh->reg->sortIdx; + size_t slen; + const char *str; + struct it_key key_in; - sortIdx_sysno (sortIdx, sysno); + sortIdx_sysno (sortIdx, sysno); - while (off < sk->buf_used) - { - int set, use, slen; - - off += key_SU_decode(&set, (unsigned char *) sk->buf + off); - off += key_SU_decode(&use, (unsigned char *) sk->buf + off); - off += key_SU_decode(&slen, (unsigned char *) sk->buf + off); - - sortIdx_type(sortIdx, use); - if (cmd == 1) - sortIdx_add(sortIdx, sk->buf + off, slen); - else - sortIdx_add(sortIdx, "", 1); - off += slen; + while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + int ord = (int) key_in.mem[0]; + + sortIdx_type(sortIdx, ord); + if (cmd == 1) + sortIdx_add(sortIdx, str, slen); + else + sortIdx_add(sortIdx, "", 1); + } } } -#endif void encode_key_init (struct encode_info *i) { - i->sysno = 0; - i->seqno = 0; - i->cmd = -1; - i->prevsys=0; - i->prevseq=0; - i->prevcmd=-1; - i->keylen=0; i->encode_handle = iscz1_start(); i->decode_handle = iscz1_start(); } -#define OLDENCODE 1 - -#ifdef OLDENCODE -/* this is the old encode_key_write - * may be deleted once we are confident that the new works - * HL 15-oct-2002 - */ void encode_key_write (char *k, struct encode_info *i, FILE *outf) { struct it_key key; @@ -2058,107 +1954,6 @@ void encode_key_flush (struct encode_info *i, FILE *outf) iscz1_stop(i->decode_handle); } -#else - -/* new encode_key_write - * The idea is to buffer one more key, and compare them - * If we are going to delete and insert the same key, - * we may as well not bother. Should make a difference in - * updates with small modifications (appending to a mbox) - */ -void encode_key_write (char *k, struct encode_info *i, FILE *outf) -{ - struct it_key key; - char *bp; - - if (*k) /* first time for new key */ - { - bp = i->buf; - while ((*bp++ = *k++)) - ; - i->keylen= bp - i->buf -1; - assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN); - } - else - { - bp=i->buf + i->keylen; - *bp++=0; - k++; - } - - memcpy (&key, k+1, sizeof(struct it_key)); - if (0==i->prevsys) /* no previous filter, fill up */ - { - i->prevsys=key.sysno; - i->prevseq=key.seqno; - i->prevcmd=*k; - } - else if ( (i->prevsys==key.sysno) && - (i->prevseq==key.seqno) && - (i->prevcmd!=*k) ) - { /* same numbers, diff cmd, they cancel out */ - i->prevsys=0; - } - else - { /* different stuff, write previous, move buf */ - bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); - if (i->sysno != i->prevsys) - { - i->sysno = i->prevsys; - i->seqno = 0; - } - else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) - { - return; /* ??? Filters some sort of duplicates away */ - /* ??? Can this ever happen -H 15oct02 */ - } - bp = encode_key_int (i->prevseq - i->seqno, bp); - i->seqno = i->prevseq; - i->cmd = i->prevcmd; - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); - } - i->keylen=0; /* ok, it's written, forget it */ - i->prevsys=key.sysno; - i->prevseq=key.seqno; - i->prevcmd=*k; - } -} - -void encode_key_flush (struct encode_info *i, FILE *outf) -{ /* flush the last key from i */ - char *bp =i->buf + i->keylen; - if (0==i->prevsys) - { - return; /* nothing to flush */ - } - *bp++=0; - bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); - if (i->sysno != i->prevsys) - { - i->sysno = i->prevsys; - i->seqno = 0; - } - else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) - { - return; /* ??? Filters some sort of duplicates away */ - /* ??? Can this ever happen -H 15oct02 */ - } - bp = encode_key_int (i->prevseq - i->seqno, bp); - i->seqno = i->prevseq; - i->cmd = i->prevcmd; - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); - } - i->keylen=0; /* ok, it's written, forget it */ - i->prevsys=0; /* forget the values too */ - i->prevseq=0; -} -#endif /* * Local variables: * c-basic-offset: 4