projects
/
idzebra-moved-to-github.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Sort facets by set frequency.
[idzebra-moved-to-github.git]
/
util
/
zebramap.c
diff --git
a/util/zebramap.c
b/util/zebramap.c
index
a951c89
..
fd62d4a
100644
(file)
--- a/
util/zebramap.c
+++ b/
util/zebramap.c
@@
-1,4
+1,4
@@
-/* $Id: zebramap.c,v 1.66 2007-11-07 10:24:28 adam Exp $
+/* $Id: zebramap.c,v 1.72 2007-11-15 08:53:26 adam Exp $
Copyright (C) 1995-2007
Index Data ApS
Copyright (C) 1995-2007
Index Data ApS
@@
-28,8
+28,8
@@
#include <attrfind.h>
#include <yaz/yaz-util.h>
#include <attrfind.h>
#include <yaz/yaz-util.h>
-#if HAVE_ICU
-#include <yaz/icu_I18N.h>
+#if YAZ_HAVE_ICU
+#include <yaz/icu.h>
#endif
#include <zebramap.h>
#endif
#include <zebramap.h>
@@
-47,6
+47,7
@@
struct zebra_map {
int first_in_field;
int type;
int use_chain;
int first_in_field;
int type;
int use_chain;
+ int debug;
union {
struct {
int entry_size;
union {
struct {
int entry_size;
@@
-54,15
+55,15
@@
struct zebra_map {
} u;
chrmaptab maptab;
const char *maptab_name;
} u;
chrmaptab maptab;
const char *maptab_name;
- const char *locale;
zebra_maps_t zebra_maps;
#if YAZ_HAVE_XML2
xmlDocPtr doc;
#endif
zebra_maps_t zebra_maps;
#if YAZ_HAVE_XML2
xmlDocPtr doc;
#endif
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
struct icu_chain *icu_chain;
#endif
struct icu_chain *icu_chain;
#endif
- WRBUF simple_buf;
+ WRBUF input_str;
+ WRBUF print_str;
size_t simple_off;
struct zebra_map *next;
};
size_t simple_off;
struct zebra_map *next;
};
@@
-86,14
+87,15
@@
void zebra_maps_close(zebra_maps_t zms)
{
if (zm->maptab)
chrmaptab_destroy(zm->maptab);
{
if (zm->maptab)
chrmaptab_destroy(zm->maptab);
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
if (zm->icu_chain)
icu_chain_destroy(zm->icu_chain);
#endif
#if YAZ_HAVE_XML2
xmlFreeDoc(zm->doc);
#endif
if (zm->icu_chain)
icu_chain_destroy(zm->icu_chain);
#endif
#if YAZ_HAVE_XML2
xmlFreeDoc(zm->doc);
#endif
- wrbuf_destroy(zm->simple_buf);
+ wrbuf_destroy(zm->input_str);
+ wrbuf_destroy(zm->print_str);
zm = zm->next;
}
wrbuf_destroy(zms->wrbuf_1);
zm = zm->next;
}
wrbuf_destroy(zms->wrbuf_1);
@@
-110,7
+112,7
@@
zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type,
zm->id = nmem_strdup(zms->nmem, index_type);
zm->maptab_name = 0;
zm->use_chain = 0;
zm->id = nmem_strdup(zms->nmem, index_type);
zm->maptab_name = 0;
zm->use_chain = 0;
- zm->locale = 0;
+ zm->debug = 0;
zm->maptab = 0;
zm->type = map_type;
zm->completeness = 0;
zm->maptab = 0;
zm->type = map_type;
zm->completeness = 0;
@@
-124,13
+126,14
@@
zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type,
zms->map_list = zm;
zms->last_map = zm;
zm->next = 0;
zms->map_list = zm;
zms->last_map = zm;
zm->next = 0;
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
zm->icu_chain = 0;
#endif
#if YAZ_HAVE_XML2
zm->doc = 0;
#endif
zm->icu_chain = 0;
#endif
#if YAZ_HAVE_XML2
zm->doc = 0;
#endif
- zm->simple_buf = wrbuf_alloc();
+ zm->input_str = wrbuf_alloc();
+ zm->print_str = wrbuf_alloc();
return zm;
}
return zm;
}
@@
-218,24
+221,16
@@
static int parse_command(zebra_maps_t zms, int argc, char **argv,
return -1;
}
}
return -1;
}
}
- else if (!yaz_matchstr(argv[0], "locale"))
- {
- zm->locale = nmem_strdup(zms->nmem, argv[1]);
- }
else if (!yaz_matchstr(argv[0], "simplechain"))
{
zm->use_chain = 1;
else if (!yaz_matchstr(argv[0], "simplechain"))
{
zm->use_chain = 1;
+#if YAZ_HAVE_ICU
zm->icu_chain = 0;
zm->icu_chain = 0;
+#endif
}
else if (!yaz_matchstr(argv[0], "icuchain"))
{
#if YAZ_HAVE_XML2
}
else if (!yaz_matchstr(argv[0], "icuchain"))
{
#if YAZ_HAVE_XML2
- if (!zm->locale)
- {
- yaz_log(YLOG_WARN, "%s:%d: locale required before icuchain",
- fname, lineno);
- return -1;
- }
zm->doc = xmlParseFile(argv[1]);
if (!zm->doc)
{
zm->doc = xmlParseFile(argv[1]);
if (!zm->doc)
{
@@
-245,11
+240,11
@@
static int parse_command(zebra_maps_t zms, int argc, char **argv,
}
else
{
}
else
{
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
UErrorCode status;
xmlNode *xml_node = xmlDocGetRootElement(zm->doc);
zm->icu_chain =
UErrorCode status;
xmlNode *xml_node = xmlDocGetRootElement(zm->doc);
zm->icu_chain =
- icu_chain_xml_config(xml_node, zm->locale,
+ icu_chain_xml_config(xml_node,
/* not sure about sort for this function yet.. */
#if 1
1,
/* not sure about sort for this function yet.. */
#if 1
1,
@@
-275,6
+270,10
@@
static int parse_command(zebra_maps_t zms, int argc, char **argv,
return -1;
#endif
}
return -1;
#endif
}
+ else if (!yaz_matchstr(argv[0], "debug") && argc == 2)
+ {
+ zm->debug = atoi(argv[1]);
+ }
else
{
yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'",
else
{
yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'",
@@
-615,8
+614,8
@@
WRBUF zebra_replace(zebra_map_t zm, const char *ex_list,
static int tokenize_simple(zebra_map_t zm,
const char **result_buf, size_t *result_len)
{
static int tokenize_simple(zebra_map_t zm,
const char **result_buf, size_t *result_len)
{
- char *buf = wrbuf_buf(zm->simple_buf);
- size_t len = wrbuf_len(zm->simple_buf);
+ char *buf = wrbuf_buf(zm->input_str);
+ size_t len = wrbuf_len(zm->input_str);
size_t i = zm->simple_off;
size_t start;
size_t i = zm->simple_off;
size_t start;
@@
-648,11
+647,12
@@
int zebra_map_tokenize(zebra_map_t zm,
if (buf)
{
if (buf)
{
- wrbuf_rewind(zm->simple_buf);
- wrbuf_write(zm->simple_buf, buf, len);
+ wrbuf_rewind(zm->input_str);
+ wrbuf_write(zm->input_str, buf, len);
zm->simple_off = 0;
}
zm->simple_off = 0;
}
+#if YAZ_HAVE_ICU
if (!zm->icu_chain)
return tokenize_simple(zm, result_buf, result_len);
else
if (!zm->icu_chain)
return tokenize_simple(zm, result_buf, result_len);
else
@@
-660,30
+660,49
@@
int zebra_map_tokenize(zebra_map_t zm,
UErrorCode status;
if (buf)
{
UErrorCode status;
if (buf)
{
- yaz_log(YLOG_LOG, "assicn_cstr %s", wrbuf_cstr(zm->simple_buf));
+ if (zm->debug)
+ {
+ wrbuf_rewind(zm->print_str);
+ wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str),
+ wrbuf_len(zm->input_str));
+
+ yaz_log(YLOG_LOG, "input %s",
+ wrbuf_cstr(zm->print_str));
+ }
icu_chain_assign_cstr(zm->icu_chain,
icu_chain_assign_cstr(zm->icu_chain,
- wrbuf_cstr(zm->simple_buf),
+ wrbuf_cstr(zm->input_str),
&status);
assert(U_SUCCESS(status));
}
while (icu_chain_next_token(zm->icu_chain, &status))
{
assert(U_SUCCESS(status));
&status);
assert(U_SUCCESS(status));
}
while (icu_chain_next_token(zm->icu_chain, &status))
{
assert(U_SUCCESS(status));
- *result_buf = icu_chain_token_norm(zm->icu_chain);
+ *result_buf = icu_chain_token_sortkey(zm->icu_chain);
assert(*result_buf);
assert(*result_buf);
- yaz_log(YLOG_LOG, "got result %s", *result_buf);
+
*result_len = strlen(*result_buf);
*result_len = strlen(*result_buf);
+
+ if (zm->debug)
+ {
+ wrbuf_rewind(zm->print_str);
+ wrbuf_write_escaped(zm->print_str, *result_buf, *result_len);
+ yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
+ }
+
if (**result_buf != '\0')
return 1;
}
assert(U_SUCCESS(status));
}
return 0;
if (**result_buf != '\0')
return 1;
}
assert(U_SUCCESS(status));
}
return 0;
+#else
+ return tokenize_simple(zm, result_buf, result_len);
+#endif
}
int zebra_maps_is_icu(zebra_map_t zm)
{
}
int zebra_maps_is_icu(zebra_map_t zm)
{
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
return zm->use_chain;
#else
return 0;
return zm->use_chain;
#else
return 0;