X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=f70d8249dd764fc3179fae1b742f52aa4ce1a730;hb=4c45f1ecb67feb1cb08457b7129d94d70e0af293;hp=e092a9b3f7f0e0bb6ad268b5b336eb622fda9551;hpb=af102b1fb451ba27bfa7343528c4240b3ab3a80b;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index e092a9b..f70d824 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1995-2008 Index Data + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -47,11 +47,18 @@ static int log_level_initialized = 0; /* eventually this the 0-case code will be removed */ #define FLUSH2 1 -void extract_flush_record_keys2(ZebraHandle zh, zint sysno, - zebra_rec_keys_t ins_keys, - zint ins_rank, - zebra_rec_keys_t del_keys, - zint del_rank); +#if FLUSH2 +static void extract_flush_record_keys2(ZebraHandle zh, zint sysno, + zebra_rec_keys_t ins_keys, + zint ins_rank, + zebra_rec_keys_t del_keys, + zint del_rank); +#else +static void extract_flush_record_keys(ZebraHandle zh, zint sysno, + int cmd, + zebra_rec_keys_t reckeys, + zint staticrank); +#endif static void zebra_init_log_level(void) { @@ -64,6 +71,21 @@ static void zebra_init_log_level(void) } } +static WRBUF wrbuf_hex_str(const char *cstr) +{ + size_t i; + WRBUF w = wrbuf_alloc(); + for (i = 0; cstr[i]; i++) + { + if (cstr[i] < ' ' || cstr[i] > 126) + wrbuf_printf(w, "\\%02X", cstr[i] & 0xff); + else + wrbuf_putc(w, cstr[i]); + } + return w; +} + + static void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd, zebra_rec_keys_t skp); static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid); @@ -287,7 +309,7 @@ static void snippet_token_add(RecWord *p) { struct snip_rec_info *h = p->extractCtrl->handle; ZebraHandle zh = h->zh; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, p->index_type); + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type); if (zm) { @@ -440,9 +462,6 @@ static char *get_match_from_spec(ZebraHandle zh, attname_str[i++] = *s; attname_str[i] = '\0'; } - - searchRecordKey(zh, reckeys, attname_str, ws, 32); - if (*s != ')') { yaz_log(YLOG_WARN, "Missing ) in match criteria %s in group %s", @@ -451,6 +470,20 @@ static char *get_match_from_spec(ZebraHandle zh, } s++; + searchRecordKey(zh, reckeys, attname_str, ws, 32); + if (0) /* for debugging */ + { + for (i = 0; i<32; i++) + { + if (ws[i]) + { + WRBUF w = wrbuf_hex_str(ws[i]); + yaz_log(YLOG_LOG, "ws[%d] = %s", i, wrbuf_cstr(w)); + wrbuf_destroy(w); + } + } + } + for (i = 0; i<32; i++) if (ws[i]) { @@ -534,6 +567,14 @@ static char *get_match_from_spec(ZebraHandle zh, return NULL; } *dst = '\0'; + + if (0) /* for debugging */ + { + WRBUF w = wrbuf_hex_str(dstBuf); + yaz_log(YLOG_LOG, "get_match_from_spec %s", wrbuf_cstr(w)); + wrbuf_destroy(w); + } + return dstBuf; } @@ -570,10 +611,10 @@ static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id, "", 0); } +/* forward declaration */ ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, struct ZebraRecStream *stream, enum zebra_recctrl_action_t action, - int test_mode, const char *recordType, zint *sysno, const char *match_criteria, @@ -682,7 +723,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, } r = zebra_extract_records_stream(zh, streamp, action, - 0, /* tst_mode */ zh->m_record_type, sysno, 0, /*match_criteria */ @@ -704,7 +744,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, const char *buf, size_t buf_size, enum zebra_recctrl_action_t action, - int test_mode, const char *recordType, zint *sysno, const char *match_criteria, @@ -746,7 +785,6 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, res = zebra_extract_records_stream(zh, &stream, action, - test_mode, recordType, sysno, match_criteria, @@ -756,69 +794,17 @@ ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, return res; } -ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, - struct ZebraRecStream *stream, - enum zebra_recctrl_action_t action, - int test_mode, - const char *recordType, - zint *sysno, - const char *match_criteria, - const char *fname, - RecType recType, - void *recTypeClientData) -{ - ZEBRA_RES res = ZEBRA_OK; - while (1) - { - int more = 0; - res = zebra_extract_record_stream(zh, stream, - action, - test_mode, - recordType, - sysno, - match_criteria, - fname, - recType, recTypeClientData, &more); - if (!more) - { - res = ZEBRA_OK; - break; - } - if (res != ZEBRA_OK) - break; - if (sysno) - break; - } - return res; -} - - -static WRBUF wrbuf_hex_str(const char *cstr) -{ - size_t i; - WRBUF w = wrbuf_alloc(); - for (i = 0; cstr[i]; i++) - { - if (cstr[i] < ' ' || cstr[i] > 126) - wrbuf_printf(w, "\\%02X", cstr[i] & 0xff); - else - wrbuf_putc(w, cstr[i]); - } - return w; -} - -ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, - struct ZebraRecStream *stream, - enum zebra_recctrl_action_t action, - int test_mode, - const char *recordType, - zint *sysno, - const char *match_criteria, - const char *fname, - RecType recType, - void *recTypeClientData, - int *more) - +static ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData, + int *more) + { zint sysno0 = 0; RecordAttr *recordAttr; @@ -917,14 +903,25 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } *more = 1; + + if (zh->m_flag_rw == 0) + { + yaz_log(YLOG_LOG, "test %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); + /* test mode .. Do not perform match */ + return ZEBRA_OK; + } + if (!sysno) { sysno = &sysno0; - - if (match_criteria && *match_criteria) { + + if (match_criteria && *match_criteria) matchStr = match_criteria; - } else { - if (zh->m_record_id && *zh->m_record_id) { + else + { + if (zh->m_record_id && *zh->m_record_id) + { matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname, zh->m_record_id); if (!matchStr) @@ -933,6 +930,17 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, pr_fname, (zint) start_offset); return ZEBRA_FAIL; } + if (0 && matchStr) + { + WRBUF w = wrbuf_alloc(); + size_t i; + for (i = 0; i < strlen(matchStr); i++) + { + wrbuf_printf(w, "%02X", matchStr[i] & 0xff); + } + yaz_log(YLOG_LOG, "Got match %s", wrbuf_cstr(w)); + wrbuf_destroy(w); + } } } if (matchStr) @@ -956,21 +964,13 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } } - if (zebra_rec_keys_empty(zh->reg->keys)) - { - /* the extraction process returned no information - the record - is probably empty - unless flagShowRecords is in use */ - if (test_mode) - return ZEBRA_OK; - } - if (! *sysno) { /* new record AKA does not exist already */ if (action == action_delete) { - yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, - pr_fname, (zint) start_offset); + yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); yaz_log(YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; } @@ -1210,6 +1210,52 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, return ZEBRA_OK; } +/** \brief extracts records from stream + \param zh Zebra Handle + \param stream stream that we read from + \param action (action_insert, action_replace, action_delete, ..) + \param recordType Record filter type "grs.xml", etc. + \param sysno pointer to sysno if already known; NULL otherwise + \param match_criteria (NULL if not already given) + \param fname filename that we read from (for logging purposes only) + \param recType record type + \param recTypeClientData client data for record type + \returns ZEBRA_OK for success; ZEBRA_FAIL for failure +*/ +ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, + struct ZebraRecStream *stream, + enum zebra_recctrl_action_t action, + const char *recordType, + zint *sysno, + const char *match_criteria, + const char *fname, + RecType recType, + void *recTypeClientData) +{ + ZEBRA_RES res = ZEBRA_OK; + while (1) + { + int more = 0; + res = zebra_extract_record_stream(zh, stream, + action, + recordType, + sysno, + match_criteria, + fname, + recType, recTypeClientData, &more); + if (!more) + { + res = ZEBRA_OK; + break; + } + if (res != ZEBRA_OK) + break; + if (sysno) + break; + } + return res; +} + ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) { ZebraHandle zh = (ZebraHandle) handle; @@ -1294,6 +1340,56 @@ ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n) return ZEBRA_OK; } +void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, + const char *str, size_t slen, NMEM nmem, int level) +{ + char keystr[200]; /* room for zints to print */ + char *dst_term = 0; + int ord = CAST_ZINT_TO_INT(key->mem[0]); + const char *index_type; + int i; + const char *string_index; + + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, &string_index); + assert(index_type); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, str); + *keystr = '\0'; + for (i = 0; i < key->len; i++) + { + sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key->mem[i]); + } + + if (*str < CHR_BASE_CHAR) + { + int i; + char dst_buf[200]; /* room for special chars */ + + strcpy(dst_buf , "?"); + + if (!strcmp(str, "")) + strcpy(dst_buf, "alwaysmatches"); + if (!strcmp(str, FIRST_IN_FIELD_STR)) + strcpy(dst_buf, "firstinfield"); + else if (!strcmp(str, CHR_UNKNOWN)) + strcpy(dst_buf, "unknown"); + else if (!strcmp(str, CHR_SPACE)) + strcpy(dst_buf, "space"); + + for (i = 0; ireg->zei, ord, &index_type, - 0/* db */, &string_index); - assert(index_type); - zebra_term_untrans_iconv(zh, nmem, index_type, - &dst_term, str); - *keystr = '\0'; - for (i = 0; ireg->zei; int normal = 0; @@ -1483,7 +1536,50 @@ void extract_flush_record_keys2(ZebraHandle zh, zint sysno, } yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized); } +#else +static void extract_flush_record_keys( + ZebraHandle zh, zint sysno, int cmd, + zebra_rec_keys_t reckeys, + zint staticrank) +{ + ZebraExplainInfo zei = zh->reg->zei; + extract_rec_keys_adjust(zh, cmd, reckeys); + + if (log_level_details) + { + yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s", + sysno, cmd ? "insert" : "delete"); + extract_rec_keys_log(zh, cmd, reckeys, log_level_details); + } + + if (!zh->reg->key_block) + { + int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8")); + const char *key_tmp_dir = res_get_def(zh->res, "keyTmpDir", "."); + int use_threads = atoi(res_get_def(zh->res, "threads", "1")); + zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads); + } + zebraExplain_recordCountIncrement(zei, cmd ? 1 : -1); + +#if 0 + yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd); + print_rec_keys(zh, reckeys); +#endif + if (zebra_rec_keys_rewind(reckeys)) + { + size_t slen; + const char *str; + struct it_key key_in; + while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) + { + key_block_write(zh->reg->key_block, sysno, + &key_in, cmd, str, slen, + staticrank, zh->m_staticrank); + } + } +} +#endif ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh, zebra_rec_keys_t reckeys, @@ -1584,9 +1680,10 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name); if (ch < 0) ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name); - key.len = 2; + key.len = 3; key.mem[0] = ch; key.mem[1] = p->record_id; + key.mem[2] = p->section_id; zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } @@ -1873,17 +1970,21 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, int cmd; struct sort_add_ent *next; WRBUF wrbuf; + zint sysno; + zint section_id; }; struct sort_add_ent *sort_ent_list = 0; - zebra_sort_sysno(si, sysno); while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + zint filter_sysno = key_in.mem[1]; + zint section_id = key_in.mem[2]; struct sort_add_ent **e = &sort_ent_list; - while (*e && (*e)->ord != ord) - e = &(*e)->next; + for (; *e; e = &(*e)->next) + if ((*e)->ord == ord && section_id == (*e)->section_id) + break; if (!*e) { *e = nmem_malloc(nmem, sizeof(**e)); @@ -1891,6 +1992,8 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, (*e)->wrbuf = wrbuf_alloc(); (*e)->ord = ord; (*e)->cmd = cmd; + (*e)->sysno = filter_sysno ? filter_sysno : sysno; + (*e)->section_id = section_id; } wrbuf_write((*e)->wrbuf, str, slen); @@ -1898,14 +2001,20 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, } if (sort_ent_list) { + zint last_sysno = 0; struct sort_add_ent *e = sort_ent_list; for (; e; e = e->next) { + if (last_sysno != e->sysno) + { + zebra_sort_sysno(si, e->sysno); + last_sysno = e->sysno; + } zebra_sort_type(si, e->ord); if (e->cmd == 1) - zebra_sort_add(si, e->wrbuf); + zebra_sort_add(si, e->section_id, e->wrbuf); else - zebra_sort_delete(si); + zebra_sort_delete(si, e->section_id); wrbuf_destroy(e->wrbuf); } } @@ -1916,6 +2025,7 @@ void extract_flush_sort_keys(ZebraHandle zh, zint sysno, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab