Happy new year
[idzebra-moved-to-github.git] / index / extract.c
index cbd4973..5071fd7 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the Zebra server.
-   Copyright (C) 1994-2011 Index Data
+   Copyright (C) Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -144,11 +144,71 @@ struct snip_rec_info {
     zebra_snippets *snippets;
 };
 
+static int parse_complete_field(RecWord *p, zebra_map_t zm,
+                                char *buf)
+{
+    const char *b = p->term_buf;
+    const char **map = 0;
+    int i = 0, remain = p->term_len;
+
+    if (remain > 0)
+       map = zebra_maps_input(zm, &b, remain, 1);
+    while (remain > 0 && i < IT_MAX_WORD)
+    {
+       while (map && *map && **map == *CHR_SPACE)
+       {
+           remain = p->term_len - (b - p->term_buf);
+
+           if (remain > 0)
+           {
+               int first = i ? 0 : 1;  /* first position */
+               map = zebra_maps_input(zm, &b, remain, first);
+           }
+           else
+               map = 0;
+       }
+       if (!map)
+           break;
+
+       if (i && i < IT_MAX_WORD)
+           buf[i++] = *CHR_SPACE;
+       while (map && *map && **map != *CHR_SPACE)
+       {
+           const char *cp = *map;
+
+           if (**map == *CHR_CUT)
+           {
+               i = 0;
+           }
+           else
+           {
+               if (i >= IT_MAX_WORD)
+                   break;
+               while (i < IT_MAX_WORD && *cp)
+                   buf[i++] = *(cp++);
+           }
+           remain = p->term_len  - (b - p->term_buf);
+           if (remain > 0)
+           {
+               map = zebra_maps_input(zm, &b, remain, 0);
+           }
+           else
+               map = 0;
+       }
+    }
+    return i;
+}
 
 static void snippet_add_complete_field(RecWord *p, int ord,
                                        zebra_map_t zm)
 {
     struct snip_rec_info *h = p->extractCtrl->handle;
+    char buf[IT_MAX_WORD+1];
+    int i = parse_complete_field(p, zm, buf);
+
+    if (!i)
+        return;
+
     if (p->term_len && p->term_buf && zebra_maps_is_index(zm))
         zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
                                p->term_buf, p->term_len);
@@ -1276,7 +1336,6 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key,
                            const char *str, size_t slen, NMEM nmem, int level)
 {
     char keystr[200]; /* room for zints to print */
-    char *dst_term = 0;
     int ord = CAST_ZINT_TO_INT(key->mem[0]);
     const char *index_type;
     int i;
@@ -1285,8 +1344,6 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key,
     zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
                             0/* db */, &string_index);
     assert(index_type);
-    zebra_term_untrans_iconv(zh, nmem, index_type,
-                             &dst_term, str);
     *keystr = '\0';
     for (i = 0; i < key->len; i++)
     {
@@ -1315,11 +1372,23 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key,
         }
         yaz_log(level, "%s%s %s %s", keystr, index_type,
                 string_index, dst_buf);
-
     }
     else
-        yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
-                string_index, dst_term);
+    {
+        char *dst_term = 0;
+        zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str);
+        if (dst_term)
+            yaz_log(level, "%s%s %s \"%s\"", keystr, index_type,
+                    string_index, dst_term);
+        else
+        {
+            WRBUF w = wrbuf_alloc();
+            wrbuf_write_escaped(w, str, strlen(str));
+            yaz_log(level, "%s%s %s %s", keystr, index_type,
+                    string_index, wrbuf_cstr(w));
+            wrbuf_destroy(w);
+        }
+    }
 }
 
 void extract_rec_keys_log(ZebraHandle zh, int is_insert,
@@ -1733,57 +1802,8 @@ static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm)
 
 static void extract_add_complete_field(RecWord *p, zebra_map_t zm)
 {
-    const char *b = p->term_buf;
     char buf[IT_MAX_WORD+1];
-    const char **map = 0;
-    int i = 0, remain = p->term_len;
-
-    if (remain > 0)
-       map = zebra_maps_input(zm, &b, remain, 1);
-
-    while (remain > 0 && i < IT_MAX_WORD)
-    {
-       while (map && *map && **map == *CHR_SPACE)
-       {
-           remain = p->term_len - (b - p->term_buf);
-
-           if (remain > 0)
-           {
-               int first = i ? 0 : 1;  /* first position */
-               map = zebra_maps_input(zm, &b, remain, first);
-           }
-           else
-               map = 0;
-       }
-       if (!map)
-           break;
-
-       if (i && i < IT_MAX_WORD)
-           buf[i++] = *CHR_SPACE;
-       while (map && *map && **map != *CHR_SPACE)
-       {
-           const char *cp = *map;
-
-           if (**map == *CHR_CUT)
-           {
-               i = 0;
-           }
-           else
-           {
-               if (i >= IT_MAX_WORD)
-                   break;
-               while (i < IT_MAX_WORD && *cp)
-                   buf[i++] = *(cp++);
-           }
-           remain = p->term_len  - (b - p->term_buf);
-           if (remain > 0)
-           {
-               map = zebra_maps_input(zm, &b, remain, 0);
-           }
-           else
-               map = 0;
-       }
-    }
+    int i = parse_complete_field(p, zm, buf);
     if (!i)
        return;
     extract_add_string(p, zm, buf, i);