-/* $Id: extract.c,v 1.263 2007-10-29 09:25:40 adam Exp $
+/* $Id: extract.c,v 1.264 2007-10-29 13:43:57 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
*/
+/** \file
+ \brief indexes records and extract tokens for indexing and sorting
+*/
+
#include <stdio.h>
#include <assert.h>
#include <ctype.h>
#endif
#include <fcntl.h>
+
#include "index.h"
#include "orddict.h"
#include <direntz.h>
#include <charmap.h>
+#include <yaz/snprintf.h>
static int log_level_extract = 0;
static int log_level_details = 0;
int cmd, zebra_rec_keys_t skp);
static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid);
static void extract_token_add(RecWord *p);
+static void extract_token_add2(RecWord *p);
static void check_log_limit(ZebraHandle zh)
{
stream->endf(stream, &null_offset);;
extractCtrl.init = extract_init;
- extractCtrl.tokenAdd = extract_token_add;
+ if (zh->reg->index_types)
+ {
+ extractCtrl.tokenAdd = extract_token_add2;
+ }
+ else
+ {
+ extractCtrl.tokenAdd = extract_token_add;
+ }
extractCtrl.schemaAdd = extract_schema_add;
extractCtrl.dh = zh->reg->dh;
extractCtrl.handle = zh;
extract_add_string(p, buf, i);
}
+static void extract_token_add2_index(ZebraHandle zh, zebra_index_type_t type,
+ RecWord *p)
+{
+ struct it_key key;
+ const char *res_buf = 0;
+ size_t res_len = 0;
+ int r = zebra_index_type_tokenize(type, p->term_buf, p->term_len,
+ &res_buf, &res_len);
+ int cat = zinfo_index_category_index;
+ int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, p->index_type, p->index_name);
+ if (ch < 0)
+ ch = zebraExplain_add_attr_str(zh->reg->zei, cat, p->index_type, p->index_name);
+ while (r)
+ {
+ int i = 0;
+ key.mem[i++] = ch;
+ key.mem[i++] = p->record_id;
+ key.mem[i++] = p->section_id;
+
+ if (zh->m_segment_indexing)
+ key.mem[i++] = p->segment;
+ key.mem[i++] = p->seqno;
+ key.len = i;
+
+ yaz_log(YLOG_LOG, "keys_write %.*s", (int) res_len, res_buf);
+ zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key);
+
+ p->seqno++;
+ r = zebra_index_type_tokenize(type, 0, 0, &res_buf, &res_len);
+ }
+}
+
+static void extract_token_add2(RecWord *p)
+{
+ zebra_index_type_t type;
+ ZebraHandle zh = p->extractCtrl->handle;
+ char type_tmp[2];
+ type_tmp[0] = p->index_type;
+ type_tmp[1] = '\0';
+ type = zebra_index_type_get(zh->reg->index_types, type_tmp);
+ if (type)
+ {
+ if (zebra_index_type_is_index(type))
+ {
+ extract_token_add2_index(zh, type, p);
+ }
+ else if (zebra_index_type_is_sort(type))
+ {
+ ;
+
+ }
+ }
+}
+
+/** \brief top-level indexing handler for recctrl system
+ \param p token data to be indexed
+
+ Call sequence:
+ extract_token
+ zebra_add_{in}_complete
+ extract_add_string
+
+ extract_add_index_string
+ or
+ extract_add_sort_string
+ or
+ extract_add_staticrank_string
+
+*/
static void extract_token_add(RecWord *p)
{
ZebraHandle zh = p->extractCtrl->handle;