1 /* This file is part of the Zebra server.
2 Copyright (C) 1994-2011 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
29 #include <yaz/yaz-util.h>
36 #define ZEBRA_MAP_TYPE_SORT 1
37 #define ZEBRA_MAP_TYPE_INDEX 2
38 #define ZEBRA_MAP_TYPE_STATICRANK 3
40 #define ZEBRA_REPLACE_ANY 300
57 const char *maptab_name;
58 zebra_maps_t zebra_maps;
63 struct icu_chain *icu_chain;
68 struct zebra_map *next;
76 const char *temp_map_ptr[2];
83 void zebra_maps_close(zebra_maps_t zms)
85 struct zebra_map *zm = zms->map_list;
89 chrmaptab_destroy(zm->maptab);
92 icu_chain_destroy(zm->icu_chain);
97 wrbuf_destroy(zm->input_str);
98 wrbuf_destroy(zm->print_str);
101 wrbuf_destroy(zms->wrbuf_1);
102 nmem_destroy(zms->nmem);
106 zebra_map_t zebra_add_map(zebra_maps_t zms, const char *index_type,
109 zebra_map_t zm = (zebra_map_t) nmem_malloc(zms->nmem, sizeof(*zm));
111 zm->zebra_maps = zms;
112 zm->id = nmem_strdup(zms->nmem, index_type);
118 zm->completeness = 0;
120 zm->alwaysmatches = 0;
121 zm->first_in_field = 0;
124 zms->last_map->next = zm;
135 zm->input_str = wrbuf_alloc();
136 zm->print_str = wrbuf_alloc();
140 static int parse_command(zebra_maps_t zms, int argc, char **argv,
141 const char *fname, int lineno)
143 zebra_map_t zm = zms->last_map;
146 yaz_log(YLOG_WARN, "%s:%d: Missing arguments for '%s'",
147 fname, lineno, argv[0]);
152 yaz_log(YLOG_WARN, "%s:%d: Too many arguments for '%s'",
153 fname, lineno, argv[0]);
156 if (!yaz_matchstr(argv[0], "index"))
158 zm = zebra_add_map(zms, argv[1], ZEBRA_MAP_TYPE_INDEX);
161 else if (!yaz_matchstr(argv[0], "sort"))
163 zm = zebra_add_map(zms, argv[1], ZEBRA_MAP_TYPE_SORT);
164 zm->u.sort.entry_size = 80;
166 else if (!yaz_matchstr(argv[0], "staticrank"))
168 zm = zebra_add_map(zms, argv[1], ZEBRA_MAP_TYPE_STATICRANK);
169 zm->completeness = 1;
173 yaz_log(YLOG_WARN, "%s:%d: Missing sort/index before '%s'",
174 fname, lineno, argv[0]);
177 else if (!yaz_matchstr(argv[0], "charmap") && argc == 2)
179 if (zm->type != ZEBRA_MAP_TYPE_STATICRANK)
180 zm->maptab_name = nmem_strdup(zms->nmem, argv[1]);
183 yaz_log(YLOG_WARN|YLOG_FATAL, "%s:%d: charmap for "
184 "staticrank is invalid", fname, lineno);
185 yaz_log(YLOG_LOG, "Type is %d", zm->type);
189 else if (!yaz_matchstr(argv[0], "completeness") && argc == 2)
191 zm->completeness = atoi(argv[1]);
193 else if (!yaz_matchstr(argv[0], "position") && argc == 2)
195 zm->positioned = atoi(argv[1]);
197 else if (!yaz_matchstr(argv[0], "alwaysmatches") && argc == 2)
199 if (zm->type != ZEBRA_MAP_TYPE_STATICRANK)
200 zm->alwaysmatches = atoi(argv[1]);
203 yaz_log(YLOG_WARN|YLOG_FATAL, "%s:%d: alwaysmatches for "
204 "staticrank is invalid", fname, lineno);
208 else if (!yaz_matchstr(argv[0], "firstinfield") && argc == 2)
210 zm->first_in_field = atoi(argv[1]);
212 else if (!yaz_matchstr(argv[0], "entrysize") && argc == 2)
214 if (zm->type == ZEBRA_MAP_TYPE_SORT)
215 zm->u.sort.entry_size = atoi(argv[1]);
219 "%s:%d: entrysize only valid in sort section",
224 else if (!yaz_matchstr(argv[0], "simplechain"))
231 else if (!yaz_matchstr(argv[0], "icuchain"))
233 char full_path[1024];
234 if (!yaz_filepath_resolve(argv[1], zms->tabpath, zms->tabroot,
237 yaz_log(YLOG_WARN, "%s:%d: Could not locate icuchain config '%s'",
238 fname, lineno, argv[1]);
242 zm->doc = xmlParseFile(full_path);
245 yaz_log(YLOG_WARN, "%s:%d: Could not load icuchain config '%s'",
246 fname, lineno, argv[1]);
253 xmlNode *xml_node = xmlDocGetRootElement(zm->doc);
255 icu_chain_xml_config(xml_node,
256 /* not sure about sort for this function yet.. */
260 zm->type == ZEBRA_MAP_TYPE_SORT,
265 yaz_log(YLOG_WARN, "%s:%d: Failed to load ICU chain %s",
266 fname, lineno, argv[1]);
270 yaz_log(YLOG_WARN, "%s:%d: ICU support unavailable",
276 yaz_log(YLOG_WARN, "%s:%d: XML support unavailable",
281 else if (!yaz_matchstr(argv[0], "debug") && argc == 2)
283 zm->debug = atoi(argv[1]);
287 yaz_log(YLOG_WARN, "%s:%d: Unrecognized directive '%s'",
288 fname, lineno, argv[0]);
294 ZEBRA_RES zebra_maps_read_file(zebra_maps_t zms, const char *fname)
303 if (!(f = yaz_fopen(zms->tabpath, fname, "r", zms->tabroot)))
305 yaz_log(YLOG_ERRNO|YLOG_FATAL, "%s", fname);
308 while ((argc = readconf_line(f, &lineno, line, 512, argv, 10)))
310 int r = parse_command(zms, argc, argv, fname, lineno);
319 (zms->no_files_read)++;
323 zebra_maps_t zebra_maps_open(Res res, const char *base_path,
324 const char *profile_path)
326 zebra_maps_t zms = (zebra_maps_t) xmalloc(sizeof(*zms));
328 zms->nmem = nmem_create();
329 zms->tabpath = profile_path ? nmem_strdup(zms->nmem, profile_path) : 0;
332 zms->tabroot = nmem_strdup(zms->nmem, base_path);
336 zms->temp_map_str[0] = '\0';
337 zms->temp_map_str[1] = '\0';
339 zms->temp_map_ptr[0] = zms->temp_map_str;
340 zms->temp_map_ptr[1] = NULL;
342 zms->wrbuf_1 = wrbuf_alloc();
344 zms->no_files_read = 0;
348 void zebra_maps_define_default_sort(zebra_maps_t zms)
350 zebra_map_t zm = zebra_add_map(zms, "s", ZEBRA_MAP_TYPE_SORT);
351 zm->u.sort.entry_size = 80;
354 zebra_map_t zebra_map_get(zebra_maps_t zms, const char *id)
357 for (zm = zms->map_list; zm; zm = zm->next)
358 if (!strcmp(zm->id, id))
363 zebra_map_t zebra_map_get_or_add(zebra_maps_t zms, const char *id)
365 struct zebra_map *zm = zebra_map_get(zms, id);
368 zm = zebra_add_map(zms, id, ZEBRA_MAP_TYPE_INDEX);
370 /* no reason to warn if no maps are read from file */
371 if (zms->no_files_read)
372 yaz_log(YLOG_WARN, "Unknown register type: %s", id);
374 zm->maptab_name = nmem_strdup(zms->nmem, "@");
375 zm->completeness = 0;
381 chrmaptab zebra_charmap_get(zebra_map_t zm)
385 if (!zm->maptab_name || !yaz_matchstr(zm->maptab_name, "@"))
387 if (!(zm->maptab = chrmaptab_create(zm->zebra_maps->tabpath,
389 zm->zebra_maps->tabroot)))
390 yaz_log(YLOG_WARN, "Failed to read character table %s",
393 yaz_log(YLOG_DEBUG, "Read character table %s", zm->maptab_name);
398 const char **zebra_maps_input(zebra_map_t zm,
399 const char **from, int len, int first)
401 chrmaptab maptab = zebra_charmap_get(zm);
403 return chr_map_input(maptab, from, len, first);
405 zm->zebra_maps->temp_map_str[0] = **from;
408 return zm->zebra_maps->temp_map_ptr;
411 const char **zebra_maps_search(zebra_map_t zm,
412 const char **from, int len, int *q_map_match)
417 maptab = zebra_charmap_get(zm);
421 map = chr_map_q_input(maptab, from, len, 0);
427 map = chr_map_input(maptab, from, len, 0);
431 zm->zebra_maps->temp_map_str[0] = **from;
434 return zm->zebra_maps->temp_map_ptr;
437 const char *zebra_maps_output(zebra_map_t zm,
440 chrmaptab maptab = zebra_charmap_get(zm);
443 return chr_map_output(maptab, from, 1);
447 /* ------------------------------------ */
449 int zebra_maps_is_complete(zebra_map_t zm)
452 return zm->completeness;
456 int zebra_maps_is_positioned(zebra_map_t zm)
459 return zm->positioned;
463 int zebra_maps_is_index(zebra_map_t zm)
466 return zm->type == ZEBRA_MAP_TYPE_INDEX;
470 int zebra_maps_is_staticrank(zebra_map_t zm)
473 return zm->type == ZEBRA_MAP_TYPE_STATICRANK;
477 int zebra_maps_is_sort(zebra_map_t zm)
480 return zm->type == ZEBRA_MAP_TYPE_SORT;
484 int zebra_maps_is_alwaysmatches(zebra_map_t zm)
487 return zm->alwaysmatches;
491 int zebra_maps_is_first_in_field(zebra_map_t zm)
494 return zm->first_in_field;
498 int zebra_maps_sort(zebra_maps_t zms, Z_SortAttributes *sortAttributes,
504 attr_init_AttrList(&use, sortAttributes->list, 1);
505 attr_init_AttrList(&structure, sortAttributes->list, 4);
508 structure_value = attr_find(&structure, 0);
509 if (structure_value == 109)
511 return attr_find(&use, NULL);
514 int zebra_maps_attr(zebra_maps_t zms, Z_AttributesPlusTerm *zapt,
515 const char **index_type, char **search_type, char *rank_type,
516 int *complete_flag, int *sort_flag)
518 AttrType completeness;
521 AttrType sort_relation;
524 int completeness_value;
526 const char *structure_str = 0;
528 int sort_relation_value;
532 attr_init_APT(&structure, zapt, 4);
533 attr_init_APT(&completeness, zapt, 6);
534 attr_init_APT(&relation, zapt, 2);
535 attr_init_APT(&sort_relation, zapt, 7);
536 attr_init_APT(&weight, zapt, 9);
537 attr_init_APT(&use, zapt, 1);
539 completeness_value = attr_find(&completeness, NULL);
540 structure_value = attr_find_ex(&structure, NULL, &structure_str);
541 relation_value = attr_find(&relation, NULL);
542 sort_relation_value = attr_find(&sort_relation, NULL);
543 weight_value = attr_find(&weight, NULL);
544 use_value = attr_find(&use, NULL);
546 if (completeness_value == 2 || completeness_value == 3)
552 *sort_flag =(sort_relation_value > 0) ? 1 : 0;
553 *search_type = "phrase";
554 strcpy(rank_type, "void");
555 if (relation_value == 102)
557 if (weight_value == -1)
559 sprintf(rank_type, "rank,w=%d,u=%d", weight_value, use_value);
565 switch (structure_value)
567 case 6: /* word list */
568 *search_type = "and-list";
570 case 105: /* free-form-text */
571 *search_type = "or-list";
573 case 106: /* document-text */
574 *search_type = "or-list";
579 case 108: /* string */
580 *search_type = "phrase";
582 case 107: /* local-number */
583 *search_type = "local";
586 case 109: /* numeric string */
588 *search_type = "numeric";
592 *search_type = "phrase";
596 *search_type = "phrase";
600 *search_type = "phrase";
604 *search_type = "phrase";
607 if (structure_str && *structure_str)
608 *index_type = structure_str;
618 WRBUF zebra_replace(zebra_map_t zm, const char *ex_list,
619 const char *input_str, int input_len)
621 wrbuf_rewind(zm->zebra_maps->wrbuf_1);
622 wrbuf_write(zm->zebra_maps->wrbuf_1, input_str, input_len);
623 return zm->zebra_maps->wrbuf_1;
626 #define SE_CHARS ";,.()-/?<> \r\n\t"
628 static int tokenize_simple(zebra_map_t zm,
629 const char **result_buf, size_t *result_len)
631 char *buf = wrbuf_buf(zm->input_str);
632 size_t len = wrbuf_len(zm->input_str);
633 size_t i = zm->simple_off;
636 while (i < len && strchr(SE_CHARS, buf[i]))
639 while (i < len && !strchr(SE_CHARS, buf[i]))
641 if (buf[i] > 32 && buf[i] < 127)
642 buf[i] = tolower(buf[i]);
649 *result_buf = buf + start;
650 *result_len = i - start;
657 int zebra_map_tokenize_next(zebra_map_t zm,
658 const char **result_buf, size_t *result_len,
659 const char **display_buf, size_t *display_len)
661 assert(zm->use_chain);
665 return tokenize_simple(zm, result_buf, result_len);
669 while (icu_chain_next_token(zm->icu_chain, &status))
671 if (!U_SUCCESS(status))
673 *result_buf = icu_chain_token_sortkey(zm->icu_chain);
676 *result_len = strlen(*result_buf);
680 *display_buf = icu_chain_token_display(zm->icu_chain);
682 *display_len = strlen(*display_buf);
686 wrbuf_rewind(zm->print_str);
687 wrbuf_write_escaped(zm->print_str, *result_buf, *result_len);
688 yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
691 if (**result_buf != '\0')
697 return tokenize_simple(zm, result_buf, result_len);
701 int zebra_map_tokenize_start(zebra_map_t zm,
702 const char *buf, size_t len)
707 assert(zm->use_chain);
709 wrbuf_rewind(zm->input_str);
710 wrbuf_write(zm->input_str, buf, len);
718 wrbuf_rewind(zm->print_str);
719 wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str),
720 wrbuf_len(zm->input_str));
722 yaz_log(YLOG_LOG, "input %s",
723 wrbuf_cstr(zm->print_str));
725 ret = icu_chain_assign_cstr(zm->icu_chain,
726 wrbuf_cstr(zm->input_str), &status);
727 if (!ret && !U_SUCCESS(status))
731 yaz_log(YLOG_WARN, "bad encoding for input");
740 int zebra_maps_is_icu(zebra_map_t zm)
744 return zm->use_chain;
754 * c-file-style: "Stroustrup"
755 * indent-tabs-mode: nil
757 * vim: shiftwidth=4 tabstop=8 expandtab