ac188a061008ef9f1bd4c6d948ab8734cf551141
[idzebra-moved-to-github.git] / index / extract.c
1 /* $Id: extract.c,v 1.261 2007-08-22 08:01:32 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <assert.h>
25 #include <ctype.h>
26 #ifdef WIN32
27 #include <io.h>
28 #endif
29 #if HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 #include <fcntl.h>
33
34 #include "index.h"
35 #include "orddict.h"
36 #include <direntz.h>
37 #include <charmap.h>
38
39 static int log_level_extract = 0;
40 static int log_level_details = 0;
41 static int log_level_initialized = 0;
42
43 /* 1 if we use eliminitate identical delete/insert keys */
44 /* eventually this the 0-case code will be removed */
45 #define FLUSH2 1
46
47 void extract_flush_record_keys2(ZebraHandle zh, zint sysno,
48                                 zebra_rec_keys_t ins_keys,
49                                 zint ins_rank,
50                                 zebra_rec_keys_t del_keys,
51                                 zint del_rank);
52
53 static void zebra_init_log_level(void)
54 {
55     if (!log_level_initialized)
56     {
57         log_level_initialized = 1;
58
59         log_level_extract = yaz_log_module_level("extract");
60         log_level_details = yaz_log_module_level("indexdetails");
61     }
62 }
63
64 static void extract_flush_record_keys(ZebraHandle zh, zint sysno,
65                                       int cmd, zebra_rec_keys_t reckeys,
66                                       zint staticrank);
67 static void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
68                                     int cmd, zebra_rec_keys_t skp);
69 static void extract_schema_add (struct recExtractCtrl *p, Odr_oid *oid);
70 static void extract_token_add (RecWord *p);
71
72 static void check_log_limit(ZebraHandle zh)
73 {
74     if (zh->records_processed + zh->records_skipped == zh->m_file_verbose_limit)
75     {
76         yaz_log(YLOG_LOG, "More than %d file log entries. Omitting rest",
77                 zh->m_file_verbose_limit);
78     }
79 }
80
81 static void logRecord (ZebraHandle zh)
82 {
83     check_log_limit(zh);
84     ++zh->records_processed;
85     if (!(zh->records_processed % 1000))
86     {
87         yaz_log(YLOG_LOG, "Records: "ZINT_FORMAT" i/u/d "
88                 ZINT_FORMAT"/"ZINT_FORMAT"/"ZINT_FORMAT, 
89                 zh->records_processed, zh->records_inserted, 
90                 zh->records_updated, zh->records_deleted);
91     }
92 }
93
94 static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl)
95 {
96     int i;
97     for (i = 0; i<256; i++)
98     {
99         if (zebra_maps_is_positioned(zh->reg->zebra_maps, i))
100             ctrl->seqno[i] = 1;
101         else
102             ctrl->seqno[i] = 0;
103     }
104     ctrl->flagShowRecords = !zh->m_flag_rw;
105 }
106
107
108 static void extract_add_index_string (RecWord *p, 
109                                       zinfo_index_category_t cat,
110                                       const char *str, int length);
111
112 static void extract_set_store_data_prepare(struct recExtractCtrl *p);
113
114 static void extract_init(struct recExtractCtrl *p, RecWord *w)
115 {
116     w->seqno = 1;
117     w->index_name = "any";
118     w->index_type = 'w';
119     w->extractCtrl = p;
120     w->record_id = 0;
121     w->section_id = 0;
122     w->segment = 0;
123 }
124
125 struct snip_rec_info {
126     ZebraHandle zh;
127     zebra_snippets *snippets;
128 };
129
130
131 static void snippet_add_complete_field(RecWord *p, int ord)
132 {
133     struct snip_rec_info *h = p->extractCtrl->handle;
134     ZebraHandle zh = h->zh;
135
136     const char *b = p->term_buf;
137     char buf[IT_MAX_WORD+1];
138     const char **map = 0;
139     int i = 0, remain = p->term_len;
140     const char *start = b;
141     const char *last = 0;
142
143     if (remain > 0)
144         map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1);
145
146     while (remain > 0 && i < IT_MAX_WORD)
147     {
148         while (map && *map && **map == *CHR_SPACE)
149         {
150             remain = p->term_len - (b - p->term_buf);
151
152             if (i == 0)
153                 start = b;  /* set to first non-ws area */
154             if (remain > 0)
155             {
156                 int first = i ? 0 : 1;  /* first position */
157
158                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, 
159                                        &b, remain, first);
160             }
161             else
162                 map = 0;
163         }
164         if (!map)
165             break;
166
167         if (i && i < IT_MAX_WORD)
168             buf[i++] = *CHR_SPACE;
169         while (map && *map && **map != *CHR_SPACE)
170         {
171             const char *cp = *map;
172
173             if (**map == *CHR_CUT)
174             {
175                 i = 0;
176             }
177             else
178             {
179                 if (i >= IT_MAX_WORD)
180                     break;
181                 while (i < IT_MAX_WORD && *cp)
182                     buf[i++] = *(cp++);
183             }
184             last = b;
185             remain = p->term_len  - (b - p->term_buf);
186             if (remain > 0)
187             {
188                 map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b,
189                                         remain, 0);
190             }
191             else
192                 map = 0;
193         }
194     }
195     if (!i)
196         return;
197     if (last && start != last)
198         zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
199                                start, last - start);
200 }
201
202 static void snippet_add_incomplete_field(RecWord *p, int ord)
203 {
204     struct snip_rec_info *h = p->extractCtrl->handle;
205     ZebraHandle zh = h->zh;
206     const char *b = p->term_buf;
207     int remain = p->term_len;
208     int first = 1;
209     const char **map = 0;
210     const char *start = b;
211     const char *last = b;
212
213     if (remain > 0)
214         map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
215
216     while (map)
217     {
218         char buf[IT_MAX_WORD+1];
219         int i, remain;
220
221         /* Skip spaces */
222         while (map && *map && **map == *CHR_SPACE)
223         {
224             remain = p->term_len - (b - p->term_buf);
225             last = b;
226             if (remain > 0)
227                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
228                                        remain, 0);
229             else
230                 map = 0;
231         }
232         if (!map)
233             break;
234         if (start != last)
235         {
236             zebra_snippets_appendn(h->snippets, p->seqno, 1, ord,
237                                    start, last - start);
238
239         }
240         start = last;
241
242         i = 0;
243         while (map && *map && **map != *CHR_SPACE)
244         {
245             const char *cp = *map;
246
247             while (i < IT_MAX_WORD && *cp)
248                 buf[i++] = *(cp++);
249             remain = p->term_len - (b - p->term_buf);
250             last = b;
251             if (remain > 0)
252                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
253             else
254                 map = 0;
255         }
256         if (!i)
257             return;
258
259         if (first)
260         {   
261             first = 0;
262             if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
263             {
264                 /* first in field marker */
265                 p->seqno++;
266             }
267         }
268         if (start != last)
269             zebra_snippets_appendn(h->snippets, p->seqno, 0, ord,
270                                    start, last - start);
271         start = last;
272         p->seqno++;
273     }
274
275 }
276
277 static void snippet_token_add(RecWord *p)
278 {
279     struct snip_rec_info *h = p->extractCtrl->handle;
280     ZebraHandle zh = h->zh;
281
282     if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
283     {
284         ZebraExplainInfo zei = zh->reg->zei;
285         int ch = zebraExplain_lookup_attr_str(
286             zei, zinfo_index_category_index, p->index_type, p->index_name);
287
288         if (zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type))
289             snippet_add_complete_field (p, ch);
290         else
291             snippet_add_incomplete_field(p, ch);
292     }
293 }
294
295 static void snippet_schema_add(
296     struct recExtractCtrl *p, Odr_oid *oid)
297 {
298
299 }
300
301 void extract_snippet(ZebraHandle zh, zebra_snippets *sn,
302                      struct ZebraRecStream *stream,
303                      RecType rt, void *recTypeClientData)
304 {
305     struct recExtractCtrl extractCtrl;
306     struct snip_rec_info info;
307     int r;
308
309     extractCtrl.stream = stream;
310     extractCtrl.first_record = 1;
311     extractCtrl.init = extract_init;
312     extractCtrl.tokenAdd = snippet_token_add;
313     extractCtrl.schemaAdd = snippet_schema_add;
314     assert(zh->reg);
315     assert(zh->reg->dh);
316
317     extractCtrl.dh = zh->reg->dh;
318     
319     info.zh = zh;
320     info.snippets = sn;
321     extractCtrl.handle = &info;
322     extractCtrl.match_criteria[0] = '\0';
323     extractCtrl.staticrank = 0;
324     extractCtrl.action = action_insert;
325     
326     init_extractCtrl(zh, &extractCtrl);
327
328     r = (*rt->extract)(recTypeClientData, &extractCtrl);
329
330 }
331
332 static void searchRecordKey(ZebraHandle zh,
333                             zebra_rec_keys_t reckeys,
334                             const char *index_name,
335                             const char **ws, int ws_length)
336 {
337     int i;
338     int ch = -1;
339     zinfo_index_category_t cat = zinfo_index_category_index;
340
341     for (i = 0; i<ws_length; i++)
342         ws[i] = NULL;
343
344     if (ch < 0)
345         ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, '0', index_name);
346     if (ch < 0)
347         ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'p', index_name);
348     if (ch < 0)
349         ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, 'w', index_name);
350
351     if (ch < 0)
352         return ;
353
354     if (zebra_rec_keys_rewind(reckeys))
355     {
356         zint startSeq = -1;
357         const char *str;
358         size_t slen;
359         struct it_key key;
360         zint seqno;
361         while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
362         {
363             assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
364
365             seqno = key.mem[key.len-1];
366             
367             if (key.mem[0] == ch)
368             {
369                 zint woff;
370                 
371                 if (startSeq == -1)
372                     startSeq = seqno;
373                 woff = seqno - startSeq;
374                 if (woff >= 0 && woff < ws_length)
375                     ws[woff] = str;
376             }
377         }
378     }
379 }
380
381 #define FILE_MATCH_BLANK "\t "
382
383 static char *get_match_from_spec(ZebraHandle zh,
384                           zebra_rec_keys_t reckeys,
385                           const char *fname, const char *spec)
386 {
387     static char dstBuf[2048];      /* static here ??? */
388     char *dst = dstBuf;
389     const char *s = spec;
390
391     while (1)
392     {
393         for (; *s && strchr(FILE_MATCH_BLANK, *s); s++)
394             ;
395         if (!*s)
396             break;
397         if (*s == '(')
398         {
399             const char *ws[32];
400             char attset_str[64], attname_str[64];
401             int i;
402             int first = 1;
403             
404             for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
405                 ;
406             for (i = 0; *s && *s != ',' && *s != ')' && 
407                      !strchr(FILE_MATCH_BLANK, *s); s++)
408                 if (i+1 < sizeof(attset_str))
409                     attset_str[i++] = *s;
410             attset_str[i] = '\0';
411             
412             for (; strchr(FILE_MATCH_BLANK, *s); s++)
413                 ;
414             if (*s != ',')
415                 strcpy(attname_str, attset_str);
416             else
417             {
418                 for (s++; strchr(FILE_MATCH_BLANK, *s); s++)
419                     ;
420                 for (i = 0; *s && *s != ')' && 
421                          !strchr(FILE_MATCH_BLANK, *s); s++)
422                     if (i+1 < sizeof(attname_str))
423                         attname_str[i++] = *s;
424                 attname_str[i] = '\0';
425             }
426
427             searchRecordKey (zh, reckeys, attname_str, ws, 32);
428
429             if (*s != ')')
430             {
431                 yaz_log (YLOG_WARN, "Missing ) in match criteria %s in group %s",
432                       spec, zh->m_group ? zh->m_group : "none");
433                 return NULL;
434             }
435             s++;
436
437             for (i = 0; i<32; i++)
438                 if (ws[i])
439                 {
440                     if (first)
441                     {
442                         *dst++ = ' ';
443                         first = 0;
444                     }
445                     strcpy (dst, ws[i]);
446                     dst += strlen(ws[i]);
447                 }
448             if (first)
449             {
450                 yaz_log (YLOG_WARN, "Record didn't contain match"
451                       " fields in (%s,%s)", attset_str, attname_str);
452                 return NULL;
453             }
454         }
455         else if (*s == '$')
456         {
457             int spec_len;
458             char special[64];
459             const char *spec_src = NULL;
460             const char *s1 = ++s;
461             while (*s1 && !strchr(FILE_MATCH_BLANK, *s1))
462                 s1++;
463
464             spec_len = s1 - s;
465             if (spec_len > sizeof(special)-1)
466                 spec_len = sizeof(special)-1;
467             memcpy (special, s, spec_len);
468             special[spec_len] = '\0';
469             s = s1;
470
471             if (!strcmp (special, "group"))
472                 spec_src = zh->m_group;
473             else if (!strcmp (special, "database"))
474                 spec_src = zh->basenames[0];
475             else if (!strcmp (special, "filename")) {
476                 spec_src = fname;
477             }
478             else if (!strcmp (special, "type"))
479                 spec_src = zh->m_record_type;
480             else 
481                 spec_src = NULL;
482             if (spec_src)
483             {
484                 strcpy (dst, spec_src);
485                 dst += strlen (spec_src);
486             }
487         }
488         else if (*s == '\"' || *s == '\'')
489         {
490             int stopMarker = *s++;
491             char tmpString[64];
492             int i = 0;
493
494             while (*s && *s != stopMarker)
495             {
496                 if (i+1 < sizeof(tmpString))
497                     tmpString[i++] = *s++;
498             }
499             if (*s)
500                 s++;
501             tmpString[i] = '\0';
502             strcpy (dst, tmpString);
503             dst += strlen (tmpString);
504         }
505         else
506         {
507             yaz_log (YLOG_WARN, "Syntax error in match criteria %s in group %s",
508                   spec, zh->m_group ? zh->m_group : "none");
509             return NULL;
510         }
511         *dst++ = 1;
512     }
513     if (dst == dstBuf)
514     {
515         yaz_log (YLOG_WARN, "No match criteria for record %s in group %s",
516               fname, zh->m_group ? zh->m_group : "none");
517         return NULL;
518     }
519     *dst = '\0';
520     return dstBuf;
521 }
522
523 struct recordLogInfo {
524     const char *fname;
525     int recordOffset;
526     struct recordGroup *rGroup;
527 };
528
529 static void all_matches_add(struct recExtractCtrl *ctrl)
530 {
531     RecWord word;
532     extract_init(ctrl, &word);
533     word.index_name = "_ALLRECORDS";
534     word.index_type = 'w';
535     word.seqno = 1;
536     extract_add_index_string (&word, zinfo_index_category_alwaysmatches,
537                               "", 0);
538 }
539
540 ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, 
541                                        struct ZebraRecStream *stream,
542                                        enum zebra_recctrl_action_t action,
543                                        int test_mode, 
544                                        const char *recordType,
545                                        zint *sysno,
546                                        const char *match_criteria,
547                                        const char *fname,
548                                        RecType recType,
549                                        void *recTypeClientData);
550
551
552 ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, 
553                              int deleteFlag)
554 {
555     ZEBRA_RES r = ZEBRA_OK;
556     int i, fd;
557     char gprefix[128];
558     char ext[128];
559     char ext_res[128];
560     struct file_read_info *fi = 0;
561     const char *original_record_type = 0;
562     RecType recType;
563     void *recTypeClientData;
564     struct ZebraRecStream stream, *streamp;
565
566     zebra_init_log_level();
567
568     if (!zh->m_group || !*zh->m_group)
569         *gprefix = '\0';
570     else
571         sprintf (gprefix, "%s.", zh->m_group);
572     
573     yaz_log(log_level_extract, "zebra_extract_file %s", fname);
574
575     /* determine file extension */
576     *ext = '\0';
577     for (i = strlen(fname); --i >= 0; )
578         if (fname[i] == '/')
579             break;
580         else if (fname[i] == '.')
581         {
582             strcpy (ext, fname+i+1);
583             break;
584         }
585     /* determine file type - depending on extension */
586     original_record_type = zh->m_record_type;
587     if (!zh->m_record_type)
588     {
589         sprintf (ext_res, "%srecordType.%s", gprefix, ext);
590         zh->m_record_type = res_get (zh->res, ext_res);
591     }
592     if (!zh->m_record_type)
593     {
594         check_log_limit(zh);
595         if (zh->records_processed + zh->records_skipped
596             < zh->m_file_verbose_limit)
597             yaz_log (YLOG_LOG, "? %s", fname);
598         zh->records_skipped++;
599         return 0;
600     }
601     /* determine match criteria */
602     if (!zh->m_record_id)
603     {
604         sprintf (ext_res, "%srecordId.%s", gprefix, ext);
605         zh->m_record_id = res_get (zh->res, ext_res);
606     }
607
608     if (!(recType =
609           recType_byName (zh->reg->recTypes, zh->res, zh->m_record_type,
610                           &recTypeClientData)))
611     {
612         yaz_log(YLOG_WARN, "No such record type: %s", zh->m_record_type);
613         return ZEBRA_FAIL;
614     }
615
616     switch(recType->version)
617     {
618     case 0:
619         break;
620     default:
621         yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type);
622     }
623     if (sysno && deleteFlag)
624     {
625         streamp = 0;
626         fi = 0;
627     }
628     else
629     {
630         char full_rep[1024];
631
632         if (zh->path_reg && !yaz_is_abspath (fname))
633         {
634             strcpy (full_rep, zh->path_reg);
635             strcat (full_rep, "/");
636             strcat (full_rep, fname);
637         }
638         else
639             strcpy (full_rep, fname);
640         
641         if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
642         {
643             yaz_log (YLOG_WARN|YLOG_ERRNO, "open %s", full_rep);
644             zh->m_record_type = original_record_type;
645             return ZEBRA_FAIL;
646         }
647         streamp = &stream;
648         zebra_create_stream_fd(streamp, fd, 0);
649     }
650     r = zebra_extract_records_stream(zh, streamp,
651                                      deleteFlag ? 
652                                      action_delete : action_update,
653                                      0, /* tst_mode */
654                                      zh->m_record_type,
655                                      sysno,
656                                      0, /*match_criteria */
657                                      fname,
658                                      recType, recTypeClientData);
659     if (streamp)
660         stream.destroy(streamp);
661     zh->m_record_type = original_record_type;
662     return r;
663 }
664
665 /*
666   If sysno is provided, then it's used to identify the reocord.
667   If not, and match_criteria is provided, then sysno is guessed
668   If not, and a record is provided, then sysno is got from there
669   
670  */
671
672 ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, 
673                                       const char *buf, size_t buf_size,
674                                       enum zebra_recctrl_action_t action,
675                                       int test_mode, 
676                                       const char *recordType,
677                                       zint *sysno,
678                                       const char *match_criteria,
679                                       const char *fname)
680 {
681     struct ZebraRecStream stream;
682     ZEBRA_RES res;
683     void *clientData;
684     RecType recType = 0;
685
686     if (recordType && *recordType)
687     {
688         yaz_log(log_level_extract,
689                 "Record type explicitly specified: %s", recordType);
690         recType = recType_byName (zh->reg->recTypes, zh->res, recordType,
691                                   &clientData);
692     } 
693     else
694     {
695         if (!(zh->m_record_type))
696         {
697             yaz_log (YLOG_WARN, "No such record type defined");
698             return ZEBRA_FAIL;
699         }
700         yaz_log(log_level_extract, "Get record type from rgroup: %s",
701                 zh->m_record_type);
702         recType = recType_byName (zh->reg->recTypes, zh->res,
703                                   zh->m_record_type, &clientData);
704         recordType = zh->m_record_type;
705     }
706     
707     if (!recType)
708     {
709         yaz_log (YLOG_WARN, "No such record type: %s", recordType);
710         return ZEBRA_FAIL;
711     }
712
713     zebra_create_stream_mem(&stream, buf, buf_size);
714
715     res = zebra_extract_records_stream(zh, &stream,
716                                        action,
717                                        test_mode, 
718                                        recordType,
719                                        sysno,
720                                        match_criteria,
721                                        fname,
722                                        recType, clientData);
723     stream.destroy(&stream);
724     return res;
725 }
726
727 ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, 
728                                        struct ZebraRecStream *stream,
729                                        enum zebra_recctrl_action_t action,
730                                        int test_mode, 
731                                        const char *recordType,
732                                        zint *sysno,
733                                        const char *match_criteria,
734                                        const char *fname,
735                                        RecType recType,
736                                        void *recTypeClientData)
737 {
738     ZEBRA_RES res = ZEBRA_OK;
739     while (1)
740     {
741         int more = 0;
742         res = zebra_extract_record_stream(zh, stream,
743                                           action,
744                                           test_mode, 
745                                           recordType,
746                                           sysno,
747                                           match_criteria,
748                                           fname,
749                                           recType, recTypeClientData, &more);
750         if (!more)
751         {
752             res = ZEBRA_OK;
753             break;
754         }
755         if (res != ZEBRA_OK)
756             break;
757         if (sysno)
758             break;
759     }
760     return res;
761 }
762
763
764 static WRBUF wrbuf_hex_str(const char *cstr)
765 {
766     size_t i;
767     WRBUF w = wrbuf_alloc();
768     for (i = 0; cstr[i]; i++)
769     {
770         if (cstr[i] < ' ' || cstr[i] > 126)
771             wrbuf_printf(w, "\\%02X", cstr[i] & 0xff);
772         else
773             wrbuf_putc(w, cstr[i]);
774     }
775     return w;
776 }
777
778 ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, 
779                                       struct ZebraRecStream *stream,
780                                       enum zebra_recctrl_action_t action,
781                                       int test_mode, 
782                                       const char *recordType,
783                                       zint *sysno,
784                                       const char *match_criteria,
785                                       const char *fname,
786                                       RecType recType,
787                                       void *recTypeClientData,
788                                       int *more)
789
790 {
791     zint sysno0 = 0;
792     RecordAttr *recordAttr;
793     struct recExtractCtrl extractCtrl;
794     int r;
795     const char *matchStr = 0;
796     Record rec;
797     off_t start_offset = 0, end_offset = 0;
798     const char *pr_fname = fname;  /* filename to print .. */
799     int show_progress = zh->records_processed + zh->records_skipped 
800         < zh->m_file_verbose_limit ? 1:0;
801
802     zebra_init_log_level();
803
804     if (!pr_fname)
805         pr_fname = "<no file>";  /* make it printable if file is omitted */
806
807     zebra_rec_keys_reset(zh->reg->keys);
808     zebra_rec_keys_reset(zh->reg->sortKeys);
809
810     if (zebraExplain_curDatabase (zh->reg->zei, zh->basenames[0]))
811     {
812         if (zebraExplain_newDatabase (zh->reg->zei, zh->basenames[0], 
813                                       zh->m_explain_database))
814             return ZEBRA_FAIL;
815     }
816
817     if (stream)
818     {
819         off_t null_offset = 0;
820         extractCtrl.stream = stream;
821
822         start_offset = stream->tellf(stream);
823
824         extractCtrl.first_record = start_offset ? 0 : 1;
825         
826         stream->endf(stream, &null_offset);;
827
828         extractCtrl.init = extract_init;
829         extractCtrl.tokenAdd = extract_token_add;
830         extractCtrl.schemaAdd = extract_schema_add;
831         extractCtrl.dh = zh->reg->dh;
832         extractCtrl.handle = zh;
833         extractCtrl.match_criteria[0] = '\0';
834         extractCtrl.staticrank = 0;
835         extractCtrl.action = action;
836
837         init_extractCtrl(zh, &extractCtrl);
838
839         extract_set_store_data_prepare(&extractCtrl);
840         
841         r = (*recType->extract)(recTypeClientData, &extractCtrl);
842
843         if (action == action_update)
844         {
845             action = extractCtrl.action;
846         }
847         
848         switch (r)
849         {
850         case RECCTRL_EXTRACT_EOF:
851             return ZEBRA_FAIL;
852         case RECCTRL_EXTRACT_ERROR_GENERIC:
853             /* error occured during extraction ... */
854             yaz_log (YLOG_WARN, "extract error: generic");
855             return ZEBRA_FAIL;
856         case RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER:
857             /* error occured during extraction ... */
858             yaz_log (YLOG_WARN, "extract error: no such filter");
859             return ZEBRA_FAIL;
860         case RECCTRL_EXTRACT_SKIP:
861             if (show_progress)
862                 yaz_log (YLOG_LOG, "skip %s %s " ZINT_FORMAT,
863                          recordType, pr_fname, (zint) start_offset);
864             *more = 1;
865             
866             end_offset = stream->endf(stream, 0);
867             if (end_offset)
868                 stream->seekf(stream, end_offset);
869
870             return ZEBRA_OK;
871         case RECCTRL_EXTRACT_OK:
872             break;
873         default:
874             yaz_log (YLOG_WARN, "extract error: unknown error: %d", r);
875             return ZEBRA_FAIL;
876         }
877         end_offset = stream->endf(stream, 0);
878         if (end_offset)
879             stream->seekf(stream, end_offset);
880         else
881             end_offset = stream->tellf(stream);
882
883         all_matches_add(&extractCtrl);
884         
885         if (extractCtrl.match_criteria[0])
886             match_criteria = extractCtrl.match_criteria;
887     }
888
889     *more = 1;
890     if (!sysno)
891     {
892         sysno = &sysno0;
893
894         if (match_criteria && *match_criteria) {
895             matchStr = match_criteria;
896         } else {
897             if (zh->m_record_id && *zh->m_record_id) {
898                 matchStr = get_match_from_spec(zh, zh->reg->keys, pr_fname, 
899                                                zh->m_record_id);
900                 if (!matchStr)
901                 {
902                     yaz_log (YLOG_LOG, "error %s %s " ZINT_FORMAT, recordType,
903                              pr_fname, (zint) start_offset);
904                     return ZEBRA_FAIL;
905                 }
906             }
907         }
908         if (matchStr) 
909         {
910             int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
911             char *rinfo = dict_lookup_ord(zh->reg->matchDict, db_ord,
912                                           matchStr);
913
914             
915             if (log_level_extract)
916             {
917                 WRBUF w = wrbuf_hex_str(matchStr);
918                 yaz_log(log_level_extract, "matchStr: %s", wrbuf_cstr(w));
919                 wrbuf_destroy(w);
920             }
921             if (rinfo)
922             {
923                 assert(*rinfo == sizeof(*sysno));
924                 memcpy (sysno, rinfo+1, sizeof(*sysno));
925             }
926        }
927     }
928     if (zebra_rec_keys_empty(zh->reg->keys))
929     {
930         /* the extraction process returned no information - the record
931            is probably empty - unless flagShowRecords is in use */
932         if (test_mode)
933             return ZEBRA_OK;
934     }
935
936     if (! *sysno)
937     {
938         /* new record */
939         if (action == action_delete)
940         {
941             yaz_log (YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
942                          pr_fname, (zint) start_offset);
943             yaz_log (YLOG_WARN, "cannot delete record above (seems new)");
944             return ZEBRA_FAIL;
945         }
946         else if (action == action_replace)
947         {
948             yaz_log (YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
949                          pr_fname, (zint) start_offset);
950             yaz_log (YLOG_WARN, "cannot update record above (seems new)");
951             return ZEBRA_FAIL;
952         }
953         if (show_progress)
954             yaz_log (YLOG_LOG, "add %s %s " ZINT_FORMAT, recordType, pr_fname,
955                      (zint) start_offset);
956         rec = rec_new (zh->reg->records);
957
958         *sysno = rec->sysno;
959
960         recordAttr = rec_init_attr (zh->reg->zei, rec);
961         if (extractCtrl.staticrank < 0)
962         {
963             yaz_log(YLOG_WARN, "Negative staticrank for record. Set to 0");
964             extractCtrl.staticrank = 0;
965         }
966
967         if (matchStr)
968         {
969             int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
970             dict_insert_ord(zh->reg->matchDict, db_ord, matchStr,
971                             sizeof(*sysno), sysno);
972         }
973
974         extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
975 #if FLUSH2
976         extract_flush_record_keys2(zh, *sysno,
977                                    zh->reg->keys, extractCtrl.staticrank,
978                                    0, recordAttr->staticrank);
979 #else
980         extract_flush_record_keys(zh, *sysno, 1, zh->reg->keys,
981                                   extractCtrl.staticrank);
982 #endif
983         recordAttr->staticrank = extractCtrl.staticrank;
984         zh->records_inserted++;
985     } 
986     else
987     {
988         /* record already exists */
989         zebra_rec_keys_t delkeys = zebra_rec_keys_open();
990         zebra_rec_keys_t sortKeys = zebra_rec_keys_open();
991         if (action == action_insert)
992         {
993             yaz_log (YLOG_LOG, "skipped %s %s " ZINT_FORMAT, 
994                          recordType, pr_fname, (zint) start_offset);
995             logRecord(zh);
996             return ZEBRA_FAIL;
997         }
998
999         rec = rec_get (zh->reg->records, *sysno);
1000         assert (rec);
1001         
1002         recordAttr = rec_init_attr (zh->reg->zei, rec);
1003
1004         /* decrease total size */
1005         zebraExplain_recordBytesIncrement (zh->reg->zei,
1006                                            - recordAttr->recordSize);
1007
1008         zebra_rec_keys_set_buf(delkeys,
1009                                rec->info[recInfo_delKeys],
1010                                rec->size[recInfo_delKeys],
1011                                0);
1012         zebra_rec_keys_set_buf(sortKeys,
1013                                rec->info[recInfo_sortKeys],
1014                                rec->size[recInfo_sortKeys],
1015                                0);
1016
1017         extract_flush_sort_keys(zh, *sysno, 0, sortKeys);
1018 #if !FLUSH2
1019         extract_flush_record_keys(zh, *sysno, 0, delkeys,
1020                                   recordAttr->staticrank);
1021 #endif
1022         if (action == action_delete)
1023         {
1024             /* record going to be deleted */
1025 #if FLUSH2
1026             extract_flush_record_keys2(zh, *sysno, 0, recordAttr->staticrank,
1027                                        delkeys, recordAttr->staticrank);
1028 #endif       
1029             if (zebra_rec_keys_empty(delkeys))
1030             {
1031                 yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1032                         pr_fname, (zint) start_offset);
1033                 yaz_log(YLOG_WARN, "cannot delete file above, "
1034                         "storeKeys false (3)");
1035             }
1036             else
1037             {
1038                 if (show_progress)
1039                     yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType,
1040                             pr_fname, (zint) start_offset);
1041                 zh->records_deleted++;
1042                 if (matchStr)
1043                 {
1044                     int db_ord = zebraExplain_get_database_ord(zh->reg->zei);
1045                     dict_delete_ord(zh->reg->matchDict, db_ord, matchStr);
1046                 }
1047                 rec_del (zh->reg->records, &rec);
1048             }
1049             zebra_rec_keys_close(delkeys);
1050             zebra_rec_keys_close(sortKeys);
1051             rec_free(&rec);
1052             logRecord(zh);
1053             return ZEBRA_OK;
1054         }
1055         else
1056         {   /* update or special_update */
1057             if (show_progress)
1058                 yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType,
1059                         pr_fname, (zint) start_offset);
1060             extract_flush_sort_keys(zh, *sysno, 1, zh->reg->sortKeys);
1061
1062 #if FLUSH2
1063             extract_flush_record_keys2(zh, *sysno,
1064                                        zh->reg->keys, extractCtrl.staticrank,
1065                                        delkeys, recordAttr->staticrank);
1066 #else
1067             extract_flush_record_keys(zh, *sysno, 1, 
1068                                       zh->reg->keys, extractCtrl.staticrank);
1069 #endif
1070             recordAttr->staticrank = extractCtrl.staticrank;
1071             zh->records_updated++;
1072         }
1073         zebra_rec_keys_close(delkeys);
1074         zebra_rec_keys_close(sortKeys);
1075     }
1076     /* update file type */
1077     xfree (rec->info[recInfo_fileType]);
1078     rec->info[recInfo_fileType] =
1079         rec_strdup (recordType, &rec->size[recInfo_fileType]);
1080
1081     /* update filename */
1082     xfree (rec->info[recInfo_filename]);
1083     rec->info[recInfo_filename] =
1084         rec_strdup (fname, &rec->size[recInfo_filename]);
1085
1086     /* update delete keys */
1087     xfree (rec->info[recInfo_delKeys]);
1088     if (!zebra_rec_keys_empty(zh->reg->keys) && zh->m_store_keys == 1)
1089     {
1090         zebra_rec_keys_get_buf(zh->reg->keys,
1091                                &rec->info[recInfo_delKeys],
1092                                &rec->size[recInfo_delKeys]);
1093     }
1094     else
1095     {
1096         rec->info[recInfo_delKeys] = NULL;
1097         rec->size[recInfo_delKeys] = 0;
1098     }
1099     /* update sort keys */
1100     xfree (rec->info[recInfo_sortKeys]);
1101
1102     zebra_rec_keys_get_buf(zh->reg->sortKeys,
1103                            &rec->info[recInfo_sortKeys],
1104                            &rec->size[recInfo_sortKeys]);
1105
1106     if (stream)
1107     {
1108         recordAttr->recordSize = end_offset - start_offset;
1109         zebraExplain_recordBytesIncrement(zh->reg->zei,
1110                                           recordAttr->recordSize);
1111     }
1112
1113     /* set run-number for this record */
1114     recordAttr->runNumber =
1115         zebraExplain_runNumberIncrement (zh->reg->zei, 0);
1116
1117     /* update store data */
1118     xfree (rec->info[recInfo_storeData]);
1119
1120     /* update store data */
1121     if (zh->store_data_buf)
1122     {
1123         rec->size[recInfo_storeData] = zh->store_data_size;
1124         rec->info[recInfo_storeData] = zh->store_data_buf;
1125         zh->store_data_buf = 0;
1126         recordAttr->recordSize = zh->store_data_size;
1127     }
1128     else if (zh->m_store_data)
1129     {
1130         off_t cur_offset = stream->tellf(stream);
1131
1132         rec->size[recInfo_storeData] = recordAttr->recordSize;
1133         rec->info[recInfo_storeData] = (char *)
1134             xmalloc (recordAttr->recordSize);
1135         stream->seekf(stream, start_offset);
1136         stream->readf(stream, rec->info[recInfo_storeData],
1137                       recordAttr->recordSize);
1138         stream->seekf(stream, cur_offset);
1139     }
1140     else
1141     {
1142         rec->info[recInfo_storeData] = NULL;
1143         rec->size[recInfo_storeData] = 0;
1144     }
1145     /* update database name */
1146     xfree (rec->info[recInfo_databaseName]);
1147     rec->info[recInfo_databaseName] =
1148         rec_strdup (zh->basenames[0], &rec->size[recInfo_databaseName]); 
1149
1150     /* update offset */
1151     recordAttr->recordOffset = start_offset;
1152     
1153     /* commit this record */
1154     rec_put (zh->reg->records, &rec);
1155     logRecord(zh);
1156     return ZEBRA_OK;
1157 }
1158
1159 ZEBRA_RES zebra_extract_explain(void *handle, Record rec, data1_node *n)
1160 {
1161     ZebraHandle zh = (ZebraHandle) handle;
1162     struct recExtractCtrl extractCtrl;
1163
1164     if (zebraExplain_curDatabase (zh->reg->zei,
1165                                   rec->info[recInfo_databaseName]))
1166     {
1167         abort();
1168         if (zebraExplain_newDatabase (zh->reg->zei,
1169                                       rec->info[recInfo_databaseName], 0))
1170             abort ();
1171     }
1172
1173     zebra_rec_keys_reset(zh->reg->keys);
1174     zebra_rec_keys_reset(zh->reg->sortKeys);
1175
1176     extractCtrl.init = extract_init;
1177     extractCtrl.tokenAdd = extract_token_add;
1178     extractCtrl.schemaAdd = extract_schema_add;
1179     extractCtrl.dh = zh->reg->dh;
1180
1181     init_extractCtrl(zh, &extractCtrl);
1182
1183     extractCtrl.flagShowRecords = 0;
1184     extractCtrl.match_criteria[0] = '\0';
1185     extractCtrl.staticrank = 0;
1186     extractCtrl.action = action_update;
1187
1188     extractCtrl.handle = handle;
1189     extractCtrl.first_record = 1;
1190     
1191     extract_set_store_data_prepare(&extractCtrl);
1192
1193     if (n)
1194         grs_extract_tree(&extractCtrl, n);
1195
1196     if (rec->size[recInfo_delKeys])
1197     {
1198         zebra_rec_keys_t delkeys = zebra_rec_keys_open();
1199         
1200         zebra_rec_keys_t sortkeys = zebra_rec_keys_open();
1201
1202         zebra_rec_keys_set_buf(delkeys, rec->info[recInfo_delKeys],
1203                                rec->size[recInfo_delKeys],
1204                                0);
1205 #if FLUSH2
1206         extract_flush_record_keys2(zh, rec->sysno, 
1207                                    zh->reg->keys, 0, delkeys, 0);
1208 #else
1209         extract_flush_record_keys(zh, rec->sysno, 0, delkeys, 0);
1210         extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);
1211 #endif
1212         zebra_rec_keys_close(delkeys);
1213
1214         zebra_rec_keys_set_buf(sortkeys, rec->info[recInfo_sortKeys],
1215                                rec->size[recInfo_sortKeys],
1216                                0);
1217
1218         extract_flush_sort_keys(zh, rec->sysno, 0, sortkeys);
1219         zebra_rec_keys_close(sortkeys);
1220     }
1221     else
1222     {
1223 #if FLUSH2
1224         extract_flush_record_keys2(zh, rec->sysno, zh->reg->keys, 0, 0, 0);
1225 #else
1226         extract_flush_record_keys(zh, rec->sysno, 1, zh->reg->keys, 0);        
1227 #endif
1228     }
1229     extract_flush_sort_keys(zh, rec->sysno, 1, zh->reg->sortKeys);
1230     
1231     xfree (rec->info[recInfo_delKeys]);
1232     zebra_rec_keys_get_buf(zh->reg->keys,
1233                            &rec->info[recInfo_delKeys], 
1234                            &rec->size[recInfo_delKeys]);
1235
1236     xfree (rec->info[recInfo_sortKeys]);
1237     zebra_rec_keys_get_buf(zh->reg->sortKeys,
1238                            &rec->info[recInfo_sortKeys],
1239                            &rec->size[recInfo_sortKeys]);
1240     return ZEBRA_OK;
1241 }
1242
1243 void extract_rec_keys_log(ZebraHandle zh, int is_insert,
1244                           zebra_rec_keys_t reckeys,
1245                           int level)
1246 {
1247     if (zebra_rec_keys_rewind(reckeys))
1248     {
1249         size_t slen;
1250         const char *str;
1251         struct it_key key;
1252         NMEM nmem = nmem_create();
1253
1254         while(zebra_rec_keys_read(reckeys, &str, &slen, &key))
1255         {
1256             char keystr[200]; /* room for zints to print */
1257             char *dst_term = 0;
1258             int ord = CAST_ZINT_TO_INT(key.mem[0]);
1259             int index_type, i;
1260             const char *string_index;
1261             
1262             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1263                                     0/* db */, &string_index);
1264             assert(index_type);
1265             zebra_term_untrans_iconv(zh, nmem, index_type,
1266                                      &dst_term, str);
1267             *keystr = '\0';
1268             for (i = 0; i<key.len; i++)
1269             {
1270                 sprintf(keystr + strlen(keystr), ZINT_FORMAT " ", key.mem[i]);
1271             }
1272
1273             if (*str < CHR_BASE_CHAR)
1274             {
1275                 int i;
1276                 char dst_buf[200]; /* room for special chars */
1277
1278                 strcpy(dst_buf , "?");
1279
1280                 if (!strcmp(str, ""))
1281                     strcpy(dst_buf, "alwaysmatches");
1282                 if (!strcmp(str, FIRST_IN_FIELD_STR))
1283                     strcpy(dst_buf, "firstinfield");
1284                 else if (!strcmp(str, CHR_UNKNOWN))
1285                     strcpy(dst_buf, "unknown");
1286                 else if (!strcmp(str, CHR_SPACE))
1287                     strcpy(dst_buf, "space");
1288                 
1289                 for (i = 0; i<slen; i++)
1290                 {
1291                     sprintf(dst_buf + strlen(dst_buf), " %d", str[i] & 0xff);
1292                 }
1293                 yaz_log(level, "%s%c %s %s", keystr, index_type,
1294                         string_index, dst_buf);
1295                 
1296             }
1297             else
1298                 yaz_log(level, "%s%c %s \"%s\"", keystr, index_type,
1299                         string_index, dst_term);
1300
1301             nmem_reset(nmem);
1302         }
1303         nmem_destroy(nmem);
1304     }
1305 }
1306
1307 void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
1308                              zebra_rec_keys_t reckeys)
1309 {
1310     ZebraExplainInfo zei = zh->reg->zei;
1311     struct ord_stat {
1312         int no;
1313         int ord;
1314         struct ord_stat *next;
1315     };
1316
1317     if (zebra_rec_keys_rewind(reckeys))
1318     {
1319         struct ord_stat *ord_list = 0;
1320         struct ord_stat *p;
1321         size_t slen;
1322         const char *str;
1323         struct it_key key_in;
1324         while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1325         {
1326             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1327
1328             for (p = ord_list; p ; p = p->next)
1329                 if (p->ord == ord)
1330                 {
1331                     p->no++;
1332                     break;
1333                 }
1334             if (!p)
1335             {
1336                 p = xmalloc(sizeof(*p));
1337                 p->no = 1;
1338                 p->ord = ord;
1339                 p->next = ord_list;
1340                 ord_list = p;
1341             }
1342         }
1343
1344         p = ord_list;
1345         while (p)
1346         {
1347             struct ord_stat *p1 = p;
1348
1349             if (is_insert)
1350                 zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
1351             else
1352                 zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
1353             p = p->next;
1354             xfree(p1);
1355         }
1356     }
1357 }
1358
1359 void extract_flush_record_keys2(ZebraHandle zh, zint sysno,
1360                                 zebra_rec_keys_t ins_keys, zint ins_rank,
1361                                 zebra_rec_keys_t del_keys, zint del_rank)
1362 {
1363     ZebraExplainInfo zei = zh->reg->zei;
1364     int normal = 0;
1365     int optimized = 0;
1366
1367     if (!zh->reg->key_block)
1368     {
1369         int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1370         const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", ".");
1371         int use_threads = atoi(res_get_def (zh->res, "threads", "1"));
1372         zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1373     }
1374
1375     if (ins_keys)
1376     {
1377         extract_rec_keys_adjust(zh, 1, ins_keys);
1378         if (!del_keys)
1379             zebraExplain_recordCountIncrement (zei, 1);
1380         zebra_rec_keys_rewind(ins_keys);
1381     }
1382     if (del_keys)
1383     {
1384         extract_rec_keys_adjust(zh, 0, del_keys);
1385         if (!ins_keys)
1386             zebraExplain_recordCountIncrement (zei, -1);
1387         zebra_rec_keys_rewind(del_keys);
1388     }
1389
1390     while (1)
1391     {
1392         size_t del_slen;
1393         const char *del_str;
1394         struct it_key del_key_in;
1395         int del = 0;
1396
1397         size_t ins_slen;
1398         const char *ins_str;
1399         struct it_key ins_key_in;
1400         int ins = 0;
1401
1402         if (del_keys)
1403             del = zebra_rec_keys_read(del_keys, &del_str, &del_slen,
1404                                       &del_key_in);
1405         if (ins_keys)
1406             ins = zebra_rec_keys_read(ins_keys, &ins_str, &ins_slen,
1407                                       &ins_key_in);
1408
1409         if (del && ins && ins_rank == del_rank
1410             && !key_compare(&del_key_in, &ins_key_in) 
1411             && ins_slen == del_slen && !memcmp(del_str, ins_str, del_slen))
1412         {
1413             optimized++;
1414             continue;
1415         }
1416         if (!del && !ins)
1417             break;
1418         
1419         normal++;
1420         if (del)
1421             key_block_write(zh->reg->key_block, sysno, 
1422                             &del_key_in, 0, del_str, del_slen,
1423                             del_rank, zh->m_staticrank);
1424         if (ins)
1425             key_block_write(zh->reg->key_block, sysno, 
1426                             &ins_key_in, 1, ins_str, ins_slen,
1427                             ins_rank, zh->m_staticrank);
1428     }
1429     yaz_log(log_level_extract, "normal=%d optimized=%d", normal, optimized);
1430 }
1431
1432 void extract_flush_record_keys(ZebraHandle zh, zint sysno, int cmd,
1433                                zebra_rec_keys_t reckeys,
1434                                zint staticrank)
1435 {
1436     ZebraExplainInfo zei = zh->reg->zei;
1437
1438     extract_rec_keys_adjust(zh, cmd, reckeys);
1439
1440     if (log_level_details)
1441     {
1442         yaz_log(log_level_details, "Keys for record " ZINT_FORMAT " %s",
1443                 sysno, cmd ? "insert" : "delete");
1444         extract_rec_keys_log(zh, cmd, reckeys, log_level_details);
1445     }
1446
1447     if (!zh->reg->key_block)
1448     {
1449         int mem = 1024*1024 * atoi( res_get_def( zh->res, "memmax", "8"));
1450         const char *key_tmp_dir = res_get_def (zh->res, "keyTmpDir", ".");
1451         int use_threads = atoi(res_get_def (zh->res, "threads", "1"));
1452         zh->reg->key_block = key_block_create(mem, key_tmp_dir, use_threads);
1453     }
1454     zebraExplain_recordCountIncrement (zei, cmd ? 1 : -1);
1455
1456 #if 0
1457     yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " cmd=%d", sysno, cmd);
1458     print_rec_keys(zh, reckeys);
1459 #endif
1460     if (zebra_rec_keys_rewind(reckeys))
1461     {
1462         size_t slen;
1463         const char *str;
1464         struct it_key key_in;
1465         while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1466         {
1467             key_block_write(zh->reg->key_block, sysno, 
1468                             &key_in, cmd, str, slen,
1469                             staticrank, zh->m_staticrank);
1470         }
1471     }
1472 }
1473
1474 ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh,
1475                                      zebra_rec_keys_t reckeys,
1476                                      zebra_snippets *snippets)
1477 {
1478     NMEM nmem = nmem_create();
1479     if (zebra_rec_keys_rewind(reckeys)) 
1480     {
1481         const char *str;
1482         size_t slen;
1483         struct it_key key;
1484         while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1485         {
1486             char *dst_term = 0;
1487             int ord;
1488             zint seqno;
1489             int index_type;
1490
1491             assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1492             seqno = key.mem[key.len-1];
1493             ord = CAST_ZINT_TO_INT(key.mem[0]);
1494             
1495             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type,
1496                                     0/* db */, 0 /* string_index */);
1497             assert(index_type);
1498             zebra_term_untrans_iconv(zh, nmem, index_type,
1499                                      &dst_term, str);
1500             zebra_snippets_append(snippets, seqno, 0, ord, dst_term);
1501             nmem_reset(nmem);
1502         }
1503     }
1504     nmem_destroy(nmem);
1505     return ZEBRA_OK;
1506 }
1507
1508 void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys)
1509 {
1510     yaz_log(YLOG_LOG, "print_rec_keys");
1511     if (zebra_rec_keys_rewind(reckeys))
1512     {
1513         const char *str;
1514         size_t slen;
1515         struct it_key key;
1516         while (zebra_rec_keys_read(reckeys, &str, &slen, &key))
1517         {
1518             char dst_buf[IT_MAX_WORD];
1519             zint seqno;
1520             int index_type;
1521             int ord = CAST_ZINT_TO_INT(key.mem[0]);
1522             const char *db = 0;
1523             assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2);
1524
1525             zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0);
1526             
1527             seqno = key.mem[key.len-1];
1528             
1529             zebra_term_untrans(zh, index_type, dst_buf, str);
1530             
1531             yaz_log(YLOG_LOG, "ord=%d seqno=" ZINT_FORMAT 
1532                     " term=%s", ord, seqno, dst_buf); 
1533         }
1534     }
1535 }
1536
1537 static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat,
1538                                      const char *str, int length)
1539 {
1540     struct it_key key;
1541     ZebraHandle zh = p->extractCtrl->handle;
1542     ZebraExplainInfo zei = zh->reg->zei;
1543     int ch, i;
1544
1545     ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1546     if (ch < 0)
1547         ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1548
1549     i = 0;
1550     key.mem[i++] = ch;
1551     key.mem[i++] = p->record_id;
1552     key.mem[i++] = p->section_id;
1553
1554     if (zh->m_segment_indexing)
1555         key.mem[i++] = p->segment;
1556     key.mem[i++] = p->seqno;
1557     key.len = i;
1558
1559     zebra_rec_keys_write(zh->reg->keys, str, length, &key);
1560 }
1561
1562 static void extract_add_sort_string(RecWord *p, const char *str, int length)
1563 {
1564     struct it_key key;
1565     ZebraHandle zh = p->extractCtrl->handle;
1566     ZebraExplainInfo zei = zh->reg->zei;
1567     int ch;
1568     zinfo_index_category_t cat = zinfo_index_category_sort;
1569
1570     ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name);
1571     if (ch < 0)
1572         ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name);
1573     key.len = 2;
1574     key.mem[0] = ch;
1575     key.mem[1] = p->record_id;
1576
1577     zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key);
1578 }
1579
1580 static void extract_add_staticrank_string(RecWord *p,
1581                                           const char *str, int length)
1582 {
1583     char valz[40];
1584     struct recExtractCtrl *ctrl = p->extractCtrl;
1585
1586     if (length > sizeof(valz)-1)
1587         length = sizeof(valz)-1;
1588
1589     memcpy(valz, str, length);
1590     valz[length] = '\0';
1591     ctrl->staticrank = atozint(valz);
1592 }
1593
1594 static void extract_add_string(RecWord *p, const char *string, int length)
1595 {
1596     ZebraHandle zh = p->extractCtrl->handle;
1597     assert (length > 0);
1598
1599     if (!p->index_name)
1600         return;
1601
1602     if (zebra_maps_is_index(zh->reg->zebra_maps, p->index_type))
1603     {
1604         extract_add_index_string(p, zinfo_index_category_index,
1605                                  string, length);
1606         if (zebra_maps_is_alwaysmatches(zh->reg->zebra_maps, p->index_type))
1607         {
1608             RecWord word;
1609             memcpy(&word, p, sizeof(word));
1610
1611             word.seqno = 1;
1612             extract_add_index_string(
1613                 &word, zinfo_index_category_alwaysmatches, "", 0);
1614         }
1615     }
1616     else if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type))
1617     {
1618         extract_add_sort_string(p, string, length);
1619     }
1620     else if (zebra_maps_is_staticrank(zh->reg->zebra_maps, p->index_type))
1621     {
1622         extract_add_staticrank_string(p, string, length);
1623     }
1624 }
1625
1626 static void extract_add_incomplete_field(RecWord *p)
1627 {
1628     ZebraHandle zh = p->extractCtrl->handle;
1629     const char *b = p->term_buf;
1630     int remain = p->term_len;
1631     int first = 1;
1632     const char **map = 0;
1633     
1634     if (remain > 0)
1635         map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
1636
1637     while (map)
1638     {
1639         char buf[IT_MAX_WORD+1];
1640         int i, remain;
1641
1642         /* Skip spaces */
1643         while (map && *map && **map == *CHR_SPACE)
1644         {
1645             remain = p->term_len - (b - p->term_buf);
1646             if (remain > 0)
1647                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b,
1648                                        remain, 0);
1649             else
1650                 map = 0;
1651         }
1652         if (!map)
1653             break;
1654         i = 0;
1655         while (map && *map && **map != *CHR_SPACE)
1656         {
1657             const char *cp = *map;
1658
1659             while (i < IT_MAX_WORD && *cp)
1660                 buf[i++] = *(cp++);
1661             remain = p->term_len - (b - p->term_buf);
1662             if (remain > 0)
1663                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0);
1664             else
1665                 map = 0;
1666         }
1667         if (!i)
1668             return;
1669
1670         if (first)
1671         {   
1672             first = 0;
1673             if (zebra_maps_is_first_in_field(zh->reg->zebra_maps, p->index_type))
1674             {
1675                 /* first in field marker */
1676                 extract_add_string(p, FIRST_IN_FIELD_STR, FIRST_IN_FIELD_LEN);
1677                 p->seqno++;
1678             }
1679         }
1680         extract_add_string (p, buf, i);
1681         p->seqno++;
1682     }
1683 }
1684
1685 static void extract_add_complete_field (RecWord *p)
1686 {
1687     ZebraHandle zh = p->extractCtrl->handle;
1688     const char *b = p->term_buf;
1689     char buf[IT_MAX_WORD+1];
1690     const char **map = 0;
1691     int i = 0, remain = p->term_len;
1692
1693     if (remain > 0)
1694         map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1);
1695
1696     while (remain > 0 && i < IT_MAX_WORD)
1697     {
1698         while (map && *map && **map == *CHR_SPACE)
1699         {
1700             remain = p->term_len - (b - p->term_buf);
1701
1702             if (remain > 0)
1703             {
1704                 int first = i ? 0 : 1;  /* first position */
1705                 map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, first);
1706             }
1707             else
1708                 map = 0;
1709         }
1710         if (!map)
1711             break;
1712
1713         if (i && i < IT_MAX_WORD)
1714             buf[i++] = *CHR_SPACE;
1715         while (map && *map && **map != *CHR_SPACE)
1716         {
1717             const char *cp = *map;
1718
1719             if (**map == *CHR_CUT)
1720             {
1721                 i = 0;
1722             }
1723             else
1724             {
1725                 if (i >= IT_MAX_WORD)
1726                     break;
1727                 while (i < IT_MAX_WORD && *cp)
1728                     buf[i++] = *(cp++);
1729             }
1730             remain = p->term_len  - (b - p->term_buf);
1731             if (remain > 0)
1732             {
1733                 map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b,
1734                                         remain, 0);
1735             }
1736             else
1737                 map = 0;
1738         }
1739     }
1740     if (!i)
1741         return;
1742     extract_add_string (p, buf, i);
1743 }
1744
1745 static void extract_token_add(RecWord *p)
1746 {
1747     ZebraHandle zh = p->extractCtrl->handle;
1748     WRBUF wrbuf;
1749
1750     if (log_level_details)
1751     {
1752         yaz_log(log_level_details, "extract_token_add "
1753                 "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s",
1754                 p->index_type, p->index_name, 
1755                 p->seqno, p->term_len, p->term_buf);
1756     }
1757     if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0,
1758                                p->term_buf, p->term_len)))
1759     {
1760         p->term_buf = wrbuf_buf(wrbuf);
1761         p->term_len = wrbuf_len(wrbuf);
1762     }
1763     if (zebra_maps_is_complete (zh->reg->zebra_maps, p->index_type))
1764         extract_add_complete_field (p);
1765     else
1766         extract_add_incomplete_field(p);
1767 }
1768
1769 static void extract_set_store_data_cb(struct recExtractCtrl *p,
1770                                       void *buf, size_t sz)
1771 {
1772     ZebraHandle zh = (ZebraHandle) p->handle;
1773
1774     xfree(zh->store_data_buf);
1775     zh->store_data_buf = 0;
1776     zh->store_data_size = 0;
1777     if (buf && sz)
1778     {
1779         zh->store_data_buf = xmalloc(sz);
1780         zh->store_data_size = sz;
1781         memcpy(zh->store_data_buf, buf, sz);
1782     }
1783 }
1784
1785 static void extract_set_store_data_prepare(struct recExtractCtrl *p)
1786 {
1787     ZebraHandle zh = (ZebraHandle) p->handle;
1788     xfree(zh->store_data_buf);
1789     zh->store_data_buf = 0;
1790     zh->store_data_size = 0;
1791     p->setStoreData = extract_set_store_data_cb;
1792 }
1793
1794 static void extract_schema_add(struct recExtractCtrl *p, Odr_oid *oid)
1795 {
1796     ZebraHandle zh = (ZebraHandle) p->handle;
1797     zebraExplain_addSchema (zh->reg->zei, oid);
1798 }
1799
1800 void extract_flush_sort_keys(ZebraHandle zh, zint sysno,
1801                              int cmd, zebra_rec_keys_t reckeys)
1802 {
1803 #if 0
1804     yaz_log(YLOG_LOG, "extract_flush_sort_keys cmd=%d sysno=" ZINT_FORMAT,
1805             cmd, sysno);
1806     extract_rec_keys_log(zh, cmd, reckeys, YLOG_LOG);
1807 #endif
1808
1809     if (zebra_rec_keys_rewind(reckeys))
1810     {
1811         zebra_sort_index_t si = zh->reg->sort_index;
1812         size_t slen;
1813         const char *str;
1814         struct it_key key_in;
1815
1816         zebra_sort_sysno(si, sysno);
1817
1818         while (zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
1819         {
1820             int ord = CAST_ZINT_TO_INT(key_in.mem[0]);
1821             
1822             zebra_sort_type(si, ord);
1823             if (cmd == 1)
1824                 zebra_sort_add(si, str, slen);
1825             else
1826                 zebra_sort_delete(si);
1827         }
1828     }
1829 }
1830
1831 /*
1832  * Local variables:
1833  * c-basic-offset: 4
1834  * indent-tabs-mode: nil
1835  * End:
1836  * vim: shiftwidth=4 tabstop=8 expandtab
1837  */
1838