Functional multi-value sort + tests
[idzebra-moved-to-github.git] / index / zsets.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20
21 #include <stdio.h>
22 #include <assert.h>
23 #ifdef WIN32
24 #include <io.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include "index.h"
30 #include "rank.h"
31 #include <yaz/diagbib1.h>
32 #include <rset.h>
33
34 #define ZSET_SORT_MAX_LEVEL 10
35
36 struct zebra_set_term_entry {
37     int reg_type;
38     char *db;
39     char *index_name;
40     char *term;
41 };
42
43 struct zebra_set {
44     char *name;
45     RSET rset;
46     NMEM nmem;
47     NMEM rset_nmem; /* for creating the rsets in */
48     zint hits;
49     int num_bases;
50     const char **basenames;
51     Z_RPNQuery *rpn;
52     Z_SortKeySpecList *sortSpec;
53     struct zset_sort_info *sort_info;
54     struct zebra_set_term_entry *term_entries;
55     int term_entries_max;
56     struct zebra_set *next;
57     int locked;
58     int estimated_hit_count;
59
60     zint cache_position;  /* last position */
61     RSFD cache_rfd;       /* rfd (NULL if not existing) */
62     zint cache_psysno;    /* sysno for last position */
63     zint approx_limit;    /* limit before we do approx */
64 };
65
66 struct zset_sort_entry {
67     zint sysno;
68     int score;
69 };
70
71 struct zset_sort_info {
72     int max_entries;
73     int num_entries;
74     struct zset_sort_entry *all_entries;
75     struct zset_sort_entry **entries;
76 };
77
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
83
84 static void loglevels(void)
85 {
86     if (log_level_set)
87         return;
88     log_level_sort = yaz_log_module_level("sorting");
89     log_level_searchhits = yaz_log_module_level("searchhits");
90     log_level_searchterms = yaz_log_module_level("searchterms");
91     log_level_resultsets = yaz_log_module_level("resultsets");
92     log_level_set = 1;
93 }
94
95
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97                                  Z_RPNQuery *rpn, ZebraSet sset)
98 {
99     RSET rset = 0;
100     Z_SortKeySpecList *sort_sequence;
101     int sort_status, i;
102     ZEBRA_RES res = ZEBRA_OK;
103
104     sort_sequence = (Z_SortKeySpecList *)
105         nmem_malloc(nmem, sizeof(*sort_sequence));
106     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107     sort_sequence->specs = (Z_SortKeySpec **)
108         nmem_malloc(nmem, sort_sequence->num_specs *
109                     sizeof(*sort_sequence->specs));
110     for (i = 0; i<sort_sequence->num_specs; i++)
111         sort_sequence->specs[i] = 0;
112     
113     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
114
115     res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
116                          nmem, rset_nmem,
117                          sort_sequence,
118                          sset->num_bases, sset->basenames,
119                          &rset);
120     if (res != ZEBRA_OK)
121     {
122         sset->rset = 0;
123         return res;
124     }
125     for (i = 0; sort_sequence->specs[i]; i++)
126         ;
127     sort_sequence->num_specs = i;
128     rset->hits_limit = sset->approx_limit;
129     if (!i)
130     {
131         res = resultSetRank(zh, sset, rset, rset_nmem);
132     }
133     else
134     {
135         res = resultSetSortSingle(zh, nmem, sset, rset,
136                                   sort_sequence, &sort_status);
137     }
138     sset->rset = rset;
139     return res;
140 }
141
142
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144                           int num_bases, char **basenames,
145                           const char *setname,
146                           zint *hits, int *estimated_hit_count)
147 {
148     ZebraSet zebraSet;
149     int i;
150     ZEBRA_RES res;
151
152     *hits = 0;
153     *estimated_hit_count = 0;
154
155     zebraSet = resultSetAdd(zh, setname, 1);
156     if (!zebraSet)
157         return ZEBRA_FAIL;
158     zebraSet->locked = 1;
159     zebraSet->rpn = 0;
160     zebraSet->nmem = m;
161     zebraSet->rset_nmem = nmem_create(); 
162
163     zebraSet->num_bases = num_bases;
164     zebraSet->basenames = 
165         nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166     for (i = 0; i<num_bases; i++)
167         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
168
169     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
170                           rpn, zebraSet);
171     *hits = zebraSet->hits;
172     if (zebraSet->estimated_hit_count)
173         *estimated_hit_count = 1;
174
175     if (zebraSet->rset)
176         zebraSet->rpn = rpn;
177     zebraSet->locked = 0;
178     if (!zebraSet->rset)
179         return ZEBRA_FAIL;
180     return res;
181 }
182
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184                       const char *db, const char *index_name, 
185                       const char *term)
186 {
187     assert(zh); /* compiler shut up */
188     if (!s->nmem)
189         s->nmem = nmem_create();
190     if (!s->term_entries)
191     {
192         int i;
193         s->term_entries_max = 1000;
194         s->term_entries =
195             nmem_malloc(s->nmem, s->term_entries_max * 
196                         sizeof(*s->term_entries));
197         for (i = 0; i < s->term_entries_max; i++)
198             s->term_entries[i].term = 0;
199     }
200     if (s->hits < s->term_entries_max)
201     {
202         s->term_entries[s->hits].reg_type = reg_type;
203         s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
206     }
207     (s->hits)++;
208 }
209
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
211 {
212     ZebraSet s;
213     int i;
214
215     for (s = zh->sets; s; s = s->next)
216         if (!strcmp(s->name, name))
217             break;
218     
219     if (!log_level_set)
220         loglevels();
221     if (s)
222     {
223         yaz_log(log_level_resultsets, "updating result set %s", name);
224         if (!ov || s->locked)
225             return NULL;
226         if (s->rset)
227         {
228             if (s->cache_rfd)
229                 rset_close(s->cache_rfd);
230             rset_delete(s->rset);
231         }
232         if (s->rset_nmem)
233             nmem_destroy(s->rset_nmem);
234         if (s->nmem)
235             nmem_destroy(s->nmem);
236     }
237     else
238     {
239         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
240
241         yaz_log(log_level_resultsets, "adding result set %s", name);
242         s = (ZebraSet) xmalloc(sizeof(*s));
243         s->next = zh->sets;
244         zh->sets = s;
245         s->name = xstrdup(name);
246
247         s->sort_info = (struct zset_sort_info *)
248             xmalloc(sizeof(*s->sort_info));
249         s->sort_info->max_entries = atoi(sort_max_str);
250         if (s->sort_info->max_entries < 2)
251             s->sort_info->max_entries = 2;
252
253         s->sort_info->entries = (struct zset_sort_entry **)
254             xmalloc(sizeof(*s->sort_info->entries) *
255                     s->sort_info->max_entries);
256         s->sort_info->all_entries = (struct zset_sort_entry *)
257             xmalloc(sizeof(*s->sort_info->all_entries) *
258                     s->sort_info->max_entries);
259         for (i = 0; i < s->sort_info->max_entries; i++)
260             s->sort_info->entries[i] = s->sort_info->all_entries + i;
261     }
262     s->locked = 0;
263     s->term_entries = 0;
264     s->hits = 0;
265     s->rset = 0;
266     s->rset_nmem = 0;
267     s->nmem = 0;
268     s->rpn = 0;
269     s->sortSpec = 0;
270     s->cache_position = 0;
271     s->cache_rfd = 0;
272     s->approx_limit = zh->approx_limit;
273     s->estimated_hit_count = 0;
274     return s;
275 }
276
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
278 {
279     ZebraSet s;
280
281     for (s = zh->sets; s; s = s->next)
282         if (!strcmp(s->name, name))
283         {
284             if (!s->term_entries && !s->rset && s->rpn)
285             {
286                 NMEM nmem = nmem_create();
287                 yaz_log(log_level_resultsets, "research %s", name);
288                 if (!s->rset_nmem)
289                     s->rset_nmem = nmem_create();
290                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291                 if (s->rset && s->sortSpec)
292                 {
293                     int sort_status;
294                     yaz_log(log_level_resultsets, "resort %s", name);
295                     resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
296                                         &sort_status);
297                 }
298                 nmem_destroy(nmem);
299             }
300             return s;
301         }
302     return NULL;
303 }
304
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306                                 const char ***basenames, int *num_bases)
307 {
308     ZebraSet sset = resultSetGet(zh, setname);
309     if (!sset)
310         return ZEBRA_FAIL;
311     *basenames = sset->basenames;
312     *num_bases = sset->num_bases;
313     return ZEBRA_OK;
314
315 }
316
317 void resultSetInvalidate(ZebraHandle zh)
318 {
319     ZebraSet s = zh->sets;
320     
321     yaz_log(log_level_resultsets, "invalidating result sets");
322     for (; s; s = s->next)
323     {
324         if (s->rset)
325         {
326             if (s->cache_rfd)
327                 rset_close(s->cache_rfd);
328             rset_delete(s->rset);
329         }
330         s->rset = 0;
331         s->cache_rfd = 0;
332         s->cache_position = 0;
333         if (s->rset_nmem)
334             nmem_destroy(s->rset_nmem);
335         s->rset_nmem=0;
336     }
337 }
338
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
340 {
341     ZebraSet * ss = &zh->sets;
342     int i;
343     
344     if (statuses)
345         for (i = 0; i<num; i++)
346             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
347     while (*ss)
348     {
349         int i = -1;
350         ZebraSet s = *ss;
351         if (num >= 0)
352         {
353             for (i = 0; i<num; i++)
354                 if (!strcmp(s->name, names[i]))
355                 {
356                     if (statuses)
357                         statuses[i] = Z_DeleteStatus_success;
358                     i = -1;
359                     break;
360                 }
361         }
362         if (i < 0)
363         {
364             *ss = s->next;
365             
366             xfree(s->sort_info->all_entries);
367             xfree(s->sort_info->entries);
368             xfree(s->sort_info);
369             
370             if (s->nmem)
371                 nmem_destroy(s->nmem);
372             if (s->rset)
373             {
374                 if (s->cache_rfd)
375                     rset_close(s->cache_rfd);
376                 rset_delete(s->rset);
377             }
378             if (s->rset_nmem)
379                 nmem_destroy(s->rset_nmem);
380             xfree(s->name);
381             xfree(s);
382         }
383         else
384             ss = &s->next;
385     }
386 }
387
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
389                                                  const char *name, 
390                                                  zint start, int num)
391 {
392     zint pos_small[10];
393     zint *pos = pos_small;
394     ZebraMetaRecord *mr;
395     int i;
396
397     if (num > 10000 || num <= 0)
398         return 0;
399
400     if (num > 10)
401         pos = xmalloc(sizeof(*pos) * num);
402     
403     for (i = 0; i<num; i++)
404         pos[i] = start+i;
405
406     mr = zebra_meta_records_create(zh, name, num, pos);
407     
408     if (num > 10)
409         xfree(pos);
410     return mr;
411 }
412
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
414                                            int num, zint *positions)
415 {
416     ZebraSet sset;
417     ZebraMetaRecord *sr = 0;
418     RSET rset;
419     int i;
420     struct zset_sort_info *sort_info;
421     size_t sysno_mem_index = 0;
422
423     if (zh->m_staticrank)
424         sysno_mem_index = 1;
425
426     if (!log_level_set)
427         loglevels();
428     if (!(sset = resultSetGet(zh, name)))
429         return NULL;
430     if (!(rset = sset->rset))
431     {
432         if (!sset->term_entries)
433             return 0;
434         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435         for (i = 0; i<num; i++)
436         {
437             sr[i].sysno = 0;
438             sr[i].score = -1;
439             sr[i].term = 0;
440             sr[i].db = 0;
441
442             if (positions[i] <= sset->term_entries_max)
443             {
444                 sr[i].term = sset->term_entries[positions[i]-1].term;
445                 sr[i].db = sset->term_entries[positions[i]-1].db;
446             }
447         }
448     }
449     else
450     {
451         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452         for (i = 0; i<num; i++)
453         {
454             sr[i].sysno = 0;
455             sr[i].score = -1;
456             sr[i].term = 0;
457             sr[i].db = 0;
458         }
459         sort_info = sset->sort_info;
460         if (sort_info)
461         {
462             zint position;
463             
464             for (i = 0; i<num; i++)
465             {
466                 position = positions[i];
467                 if (position > 0 && position <= sort_info->num_entries)
468                 {
469                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470                             " (sorted)", position);
471                     sr[i].sysno = sort_info->entries[position-1]->sysno;
472                     sr[i].score = sort_info->entries[position-1]->score;
473                 }
474             }
475         }
476         /* did we really get all entries using sort ? */
477         for (i = 0; i<num; i++)
478         {
479             if (!sr[i].sysno)
480                 break;
481         }
482         if (i < num) /* nope, get the rest, unsorted - sorry */
483         {
484             zint position = 0;
485             int num_i = 0;
486             zint psysno = 0;
487             RSFD rfd;
488             struct it_key key;
489             
490             if (sort_info)
491                 position = sort_info->num_entries;
492             while (num_i < num && positions[num_i] <= position)
493                 num_i++;
494             
495             if (sset->cache_rfd &&
496                 num_i < num && positions[num_i] > sset->cache_position)
497             {
498                 position = sset->cache_position;
499                 rfd = sset->cache_rfd;
500                 psysno = sset->cache_psysno;
501             }
502             else
503             {
504                 if (sset->cache_rfd)
505                     rset_close(sset->cache_rfd);
506                 rfd = rset_open(rset, RSETF_READ);
507             }
508             while (num_i < num && rset_read(rfd, &key, 0))
509             {
510                 zint this_sys = key.mem[sysno_mem_index];
511                 if (this_sys != psysno)
512                 {
513                     psysno = this_sys;
514                     if (sort_info)
515                     {
516                         /* determine we alreay have this in our set */
517                         for (i = sort_info->num_entries; --i >= 0; )
518                             if (psysno == sort_info->entries[i]->sysno)
519                                 break;
520                         if (i >= 0)
521                             continue;
522                     }
523                     position++;
524                     assert(num_i < num);
525                     if (position == positions[num_i])
526                     {
527                         sr[num_i].sysno = psysno;
528                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529                         sr[num_i].score = -1;
530                         num_i++;
531                     }
532                 }
533             }
534             sset->cache_position = position;
535             sset->cache_psysno = psysno;
536             sset->cache_rfd = rfd;
537         }
538     }
539     return sr;
540 }
541
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
543                                 int num)
544 {
545     assert(zh); /* compiler shut up about unused arg */
546     xfree(records);
547 }
548
549 struct sortKeyInfo {
550     int relation;
551     int *ord; /* array of ord for each database searched */
552     int *numerical; /* array of ord for each database searched */
553     const char *index_type;
554 };
555
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557                          int database_no,
558                          struct sortKeyInfo *criteria, int num_criteria,
559                          zint sysno,
560                          char *cmp_buf[], char *tmp_cmp_buf[])
561 {
562     struct zset_sort_entry *new_entry = NULL;
563     struct zset_sort_info *sort_info = sset->sort_info;
564     int i, j;
565     WRBUF w = wrbuf_alloc();
566
567     zebra_sort_sysno(zh->reg->sort_index, sysno);
568     for (i = 0; i<num_criteria; i++)
569     {
570         char *this_entry_buf = tmp_cmp_buf[i];
571         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
572         
573         if (criteria[i].ord[database_no] != -1)
574         {
575             yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576                     criteria[i].ord[database_no]);
577             zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
578             wrbuf_rewind(w);
579             if (zebra_sort_read(zh->reg->sort_index, w))
580             {
581                 int off = 0;
582                 while (off != wrbuf_len(w))
583                 {
584                     assert(off < wrbuf_len(w));
585                     if (off == 0)
586                         strcpy(this_entry_buf, wrbuf_buf(w));
587                     else if (criteria[i].relation == 'A')
588                     {
589                         if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
590                             strcpy(this_entry_buf, wrbuf_buf(w)+off);
591                     }
592                     else if (criteria[i].relation == 'D')
593                     {
594                         if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
595                             strcpy(this_entry_buf, wrbuf_buf(w)+off);
596                     }
597                     off += 1 + strlen(wrbuf_buf(w)+off);
598                 }
599             }
600         }
601         else
602         {
603             yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
604         }
605     }
606     wrbuf_destroy(w);
607     i = sort_info->num_entries;
608     while (--i >= 0)
609     {
610         int rel = 0;
611         for (j = 0; j<num_criteria; j++)
612         {
613             char *this_entry_buf = tmp_cmp_buf[j];
614             char *other_entry_buf = 
615                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
616             if (criteria[j].numerical[database_no])
617             {
618                 char this_entry_org[1024];
619                 char other_entry_org[1024];
620                 double diff;
621                 /* when searching multiple databases, we use the index
622                    type of the first one. So if they differ between
623                    databases, we have a problem here we could store the
624                    index_type for each database, but if we didn't find the
625                    record in any sort index, then we still don't know to
626                    which database it belongs. */
627                 const char *index_type = criteria[j].index_type;
628                 zebra_term_untrans(zh, index_type, this_entry_org,
629                                    this_entry_buf);
630                 zebra_term_untrans(zh, index_type, other_entry_org,
631                                    other_entry_buf);
632                 diff = atof(this_entry_org) - atof(other_entry_org);
633                 
634                 if (diff > 0.0)
635                     rel = 1;
636                 else if (diff < 0.0)
637                     rel = -1;
638                 else
639                     rel = 0;
640             }
641             else
642             {
643                 rel = memcmp(this_entry_buf, other_entry_buf,
644                              SORT_IDX_ENTRYSIZE);
645             }
646             /* when the compare is equal, continue to next criteria, 
647                else break out */
648             if (rel)
649                 break;
650         }       
651         if (!rel)
652             break;
653         if (criteria[j].relation == 'A')
654         {
655             if (rel > 0)
656                 break;
657         }
658         else if (criteria[j].relation == 'D')
659         {
660             if (rel < 0)
661                 break;
662         }
663     }
664     ++i;
665     yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
666     j = sort_info->max_entries;
667     if (i == j){
668         yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
669         return;
670     }
671
672     if (sort_info->num_entries == j)
673         --j;
674     else
675         j = (sort_info->num_entries)++;
676     new_entry = sort_info->entries[j];
677     /* move up all higher entries (to make room) */
678     while (j != i)
679     {
680         int k;
681         for (k = 0; k<num_criteria; k++)
682         {
683             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
684             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
685             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
686         }
687         sort_info->entries[j] = sort_info->entries[j-1];
688         --j;
689     }
690     /* and insert the new entry at the correct place */
691     sort_info->entries[i] = new_entry;
692     assert(new_entry);
693     /* and add this to the compare buffer */
694     for (i = 0; i<num_criteria; i++)
695     {
696         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
697         char *this_entry_buf = tmp_cmp_buf[i];
698         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
699     }
700     new_entry->sysno = sysno;
701     new_entry->score = -1;
702 }
703
704 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
705                          zint sysno, int score, int relation)
706 {
707     struct zset_sort_entry *new_entry = NULL;
708     int i, j;
709     assert(zh); /* compiler shut up about unused arg */
710
711     i = sort_info->num_entries;
712     while (--i >= 0)
713     {
714         int rel = 0;
715
716         rel = score - sort_info->entries[i]->score;
717
718         if (relation == 'D')
719         {
720             if (rel >= 0)
721                 break;
722         }
723         else if (relation == 'A')
724         {
725             if (rel <= 0)
726                 break;
727         }
728     }
729     ++i;
730     j = sort_info->max_entries;
731     if (i == j)
732         return;
733
734     if (sort_info->num_entries == j)
735         --j;
736     else
737         j = (sort_info->num_entries)++;
738     
739     new_entry = sort_info->entries[j];
740     while (j != i)
741     {
742         sort_info->entries[j] = sort_info->entries[j-1];
743         --j;
744     }
745     sort_info->entries[i] = new_entry;
746     assert(new_entry);
747     new_entry->sysno = sysno;
748     new_entry->score = score;
749 }
750
751 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
752 {
753     Z_RPNQuery *dst = 0;
754     ODR encode = odr_createmem(ODR_ENCODE);
755     ODR decode = odr_createmem(ODR_DECODE);
756
757     if (z_RPNQuery(encode, &src, 0, 0))
758     {
759         int len;
760         char *buf = odr_getbuf(encode, &len, 0);
761
762         if (buf)
763         {
764             odr_setbuf(decode, buf, len, 0);
765             z_RPNQuery(decode, &dst, 0, 0);
766         }
767     }
768     nmem_transfer(nmem, decode->mem);
769     odr_destroy(encode);
770     odr_destroy(decode);
771     return dst;
772 }
773
774 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
775 {
776     Z_SortKeySpecList *dst = 0;
777     ODR encode = odr_createmem(ODR_ENCODE);
778     ODR decode = odr_createmem(ODR_DECODE);
779
780     if (z_SortKeySpecList(encode, &src, 0, 0))
781     {
782         int len;
783         char *buf = odr_getbuf(encode, &len, 0);
784
785         if (buf)
786         {
787             odr_setbuf(decode, buf, len, 0);
788             z_SortKeySpecList(decode, &dst, 0, 0);
789         }
790     }
791     nmem_transfer(nmem, decode->mem);
792     odr_destroy(encode);
793     odr_destroy(decode);
794     return dst;
795 }
796
797 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
798                         ZebraSet rset)
799 {
800     ZebraSet nset;
801     int i;
802
803     nset = resultSetAdd(zh, setname, 1);
804     if (!nset)
805         return 0;
806
807     nset->nmem = nmem_create();
808
809     nset->num_bases = rset->num_bases;
810     nset->basenames = 
811         nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
812     for (i = 0; i<rset->num_bases; i++)
813         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
814
815     if (rset->rset)
816         nset->rset = rset_dup(rset->rset);
817     if (rset->rpn)
818         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
819     return nset;
820 }
821
822 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
823                         int num_input_setnames, const char **input_setnames,
824                         const char *output_setname,
825                         Z_SortKeySpecList *sort_sequence, int *sort_status)
826 {
827     ZebraSet sset;
828     RSET rset;
829
830     if (num_input_setnames == 0)
831     {
832         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
833         return ZEBRA_FAIL;
834     }
835     if (num_input_setnames > 1)
836     {
837         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
838         return ZEBRA_FAIL;
839     }
840     if (!log_level_set)
841         loglevels();
842     yaz_log(log_level_sort, "result set sort input=%s output=%s",
843             *input_setnames, output_setname);
844     sset = resultSetGet(zh, input_setnames[0]);
845     if (!sset)
846     {
847         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
848                        input_setnames[0]);
849         return ZEBRA_FAIL;
850     }
851     if (!(rset = sset->rset))
852     {
853         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
854                        input_setnames[0]);
855         return ZEBRA_FAIL;
856     }
857     if (strcmp(output_setname, input_setnames[0]))
858         sset = resultSetClone(zh, output_setname, sset);
859     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
860     return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
861                                sort_status);
862 }
863
864 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
865                               ZebraSet sset, RSET rset,
866                               Z_SortKeySpecList *sort_sequence,
867                               int *sort_status)
868 {
869     int i;
870     int ib;
871     int n = 0;
872     zint kno = 0;
873     zint psysno = 0;
874     struct it_key key;
875     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
876     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
877     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
878     int num_criteria;
879     RSFD rfd;
880     TERMID termid;
881     TERMID *terms;
882     int numTerms = 0;
883     size_t sysno_mem_index = 0;
884     
885     int numbases = zh->num_basenames;
886     yaz_log(log_level_sort, "searching %d databases",numbases);
887
888     if (zh->m_staticrank)
889         sysno_mem_index = 1;
890
891     assert(nmem); /* compiler shut up about unused param */
892     sset->sort_info->num_entries = 0;
893
894     rset_getterms(rset, 0, 0, &n);
895     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
896     rset_getterms(rset, terms, n, &numTerms);
897
898     sset->hits = 0;
899     num_criteria = sort_sequence->num_specs;
900     if (num_criteria > ZSET_SORT_MAX_LEVEL)
901         num_criteria = ZSET_SORT_MAX_LEVEL;
902     /* set up the search criteria */
903     for (i = 0; i < num_criteria; i++)
904     {
905         Z_SortKeySpec *sks = sort_sequence->specs[i];
906         Z_SortKey *sk;
907         ZEBRA_RES res;
908         
909         sort_criteria[i].ord = (int *)
910             nmem_malloc(nmem, sizeof(int)*numbases);
911         sort_criteria[i].numerical = (int *)
912             nmem_malloc(nmem, sizeof(int)*numbases);
913         
914         /* initialize ord and numerical for each database */
915         for (ib = 0; ib < numbases; ib++)
916         {
917             sort_criteria[i].ord[ib] = -1;
918             sort_criteria[i].numerical[ib] = 0;
919         }
920
921         if (sks->which == Z_SortKeySpec_missingValueData)
922         {
923             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
924             return ZEBRA_FAIL;
925         }
926         if (*sks->sortRelation == Z_SortKeySpec_ascending)
927             sort_criteria[i].relation = 'A';
928         else if (*sks->sortRelation == Z_SortKeySpec_descending)
929             sort_criteria[i].relation = 'D';
930         else
931         {
932             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
933             return ZEBRA_FAIL;
934         }
935         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
936         {
937             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
938             return ZEBRA_FAIL;
939         }
940         else if (sks->sortElement->which != Z_SortElement_generic)
941         {
942             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
943             return ZEBRA_FAIL;
944         }       
945         sk = sks->sortElement->u.generic;
946         switch (sk->which)
947         {
948         case Z_SortKey_sortField:
949             yaz_log(log_level_sort, "key %d is of type sortField", i+1);
950             for (ib = 0; ib < numbases; ib++)
951             {
952                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
953                 sort_criteria[i].numerical[ib] = 0;
954                 sort_criteria[i].ord[ib] = 
955                     zebraExplain_lookup_attr_str(zh->reg->zei,
956                                                  zinfo_index_category_sort,
957                                                  0, sk->u.sortField);
958                 if (sks->which != Z_SortKeySpec_null
959                     && sort_criteria[i].ord[ib] == -1)
960                 {
961                     zebra_setError(zh,
962                                    YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
963                     return ZEBRA_FAIL;
964                 }
965             }
966             break;
967         case Z_SortKey_elementSpec:
968             yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
969             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
970             return ZEBRA_FAIL;
971         case Z_SortKey_sortAttributes:
972             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
973             /* for every database we searched, get the sort index file
974                id (ord) and its numerical indication and store them in
975                the sort_criteria */
976             for (ib = 0; ib < numbases; ib++)
977             {
978                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
979                 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
980                                          &sort_criteria[i].ord[ib],
981                                          &sort_criteria[i].numerical[ib]);
982             }
983             
984             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
985                 return ZEBRA_FAIL;
986             break;
987         }
988         /* right now we look up the index type based on the first database
989            if the index_type's can differ between the indexes of different
990            databases (which i guess they can?) then we have to store the
991            index types for each database, just like the ord and numerical */
992         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
993                                     &sort_criteria[i].index_type,
994                                     0, 0))
995         {
996             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
997             return ZEBRA_FAIL;
998         }
999     }
1000     /* allocate space for each cmpare buf + one extra for tmp comparison */
1001     /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1002        all other result entries to compare against. This is slowly filled when records are processed.
1003        tmp_cmp_buf is an array with a value of the current record for each criteria
1004     */
1005     for (i = 0; i<num_criteria; i++)
1006     {
1007         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1008                              * SORT_IDX_ENTRYSIZE);
1009         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1010     }
1011     rfd = rset_open(rset, RSETF_READ);
1012     while (rset_read(rfd, &key, &termid))
1013     {
1014         zint this_sys = key.mem[sysno_mem_index];
1015         if (log_level_searchhits)
1016             key_logdump_txt(log_level_searchhits, &key, termid->name);
1017         kno++;
1018         if (this_sys != psysno)
1019         {
1020             int database_no = 0;
1021             if ((sset->hits & 255) == 0 && zh->break_handler_func)
1022             {
1023                 if (zh->break_handler_func(zh->break_handler_data))
1024                 {
1025                     rset_set_hits_limit(rset, 0);
1026                     break;
1027                 }
1028             }
1029             (sset->hits)++;
1030             psysno = this_sys;
1031
1032             /* determine database from the term, but only bother if more than
1033                one database is in use*/
1034             if (numbases > 1 && termid->ol)
1035             {
1036                 const char *this_db = 0;
1037                 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord,  0, &this_db, 0)
1038                     == 0 && this_db)
1039                 {
1040                     for (ib = 0; ib < numbases; ib++)
1041                         if (!strcmp(this_db, zh->basenames[ib]))
1042                             database_no = ib;
1043                 }
1044             }
1045 #if 0
1046             yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1047                 database_no);
1048             ord_list_print(termid->ol);
1049 #endif
1050             resultSetInsertSort(zh, sset, database_no,
1051                                 sort_criteria, num_criteria, psysno, cmp_buf,
1052                                 tmp_cmp_buf);
1053         }
1054     }
1055     rset_close(rfd);
1056
1057     /* free the compare buffers */
1058     for (i = 0; i<num_criteria; i++)
1059     {
1060         xfree(cmp_buf[i]);
1061         xfree(tmp_cmp_buf[i]);
1062     }
1063
1064     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1065             kno, sset->hits);   
1066     for (i = 0; i < numTerms; i++)
1067         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1068                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1069     *sort_status = Z_SortResponse_success;
1070     return ZEBRA_OK;
1071 }
1072
1073 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1074 {
1075     ZebraSet s;
1076
1077     if ((s = resultSetGet(zh, resultSetId)))
1078         return s->rset;
1079     return NULL;
1080 }
1081
1082 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1083                         RSET rset, NMEM nmem)
1084 {
1085     struct it_key key;
1086     TERMID termid;
1087     TERMID *terms;
1088     zint kno = 0;
1089     int numTerms = 0;
1090     int n = 0;
1091     int i;
1092     ZebraRankClass rank_class;
1093     struct zset_sort_info *sort_info;
1094     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1095     size_t sysno_mem_index = 0;
1096
1097     if (zh->m_staticrank)
1098         sysno_mem_index = 1;
1099
1100     if (!log_level_set)
1101         loglevels();
1102     sort_info = zebraSet->sort_info;
1103     sort_info->num_entries = 0;
1104     zebraSet->hits = 0;
1105     zebraSet->estimated_hit_count = 0;
1106     rset_getterms(rset, 0, 0, &n);
1107     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1108     rset_getterms(rset, terms, n, &numTerms);
1109
1110     rank_class = zebraRankLookup(zh, rank_handler_name);
1111     if (!rank_class)
1112     {
1113         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1114         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1115         return ZEBRA_FAIL;
1116     }
1117     else
1118     {
1119         RSFD rfd = rset_open(rset, RSETF_READ);
1120         struct rank_control *rc = rank_class->control;
1121         int score;
1122         zint count = 0;
1123         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1124                                      nmem, terms, numTerms);
1125         zint psysno = 0;  /* previous doc id / sys no */
1126         zint pstaticrank = 0; /* previous static rank */
1127         int stop_flag = 0;
1128         while (rset_read(rfd, &key, &termid))
1129         {
1130             zint this_sys = key.mem[sysno_mem_index];
1131
1132             zint seqno = key.mem[key.len-1];
1133             kno++;
1134             if (log_level_searchhits)
1135                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1136             if (this_sys != psysno) 
1137             {   /* new record .. */
1138                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1139                 {
1140                     if (zh->break_handler_func(zh->break_handler_data))
1141                     {
1142                         yaz_log(YLOG_LOG, "Aborted search");
1143                         stop_flag = 1;
1144                     }
1145                 }
1146                 if (rfd->counted_items > rset->hits_limit)
1147                     stop_flag = 1;
1148                 if (psysno)
1149                 {   /* only if we did have a previous record */
1150                     score = (*rc->calc)(handle, psysno, pstaticrank,
1151                                         &stop_flag);
1152                     /* insert the hit. A=Ascending */
1153                     resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1154                     count++;
1155                 }
1156                 if (stop_flag)
1157                 {
1158                     zebraSet->estimated_hit_count = 1;
1159                     rset_set_hits_limit(rset, 0);
1160                     break;
1161                 }
1162                 psysno = this_sys;
1163                 if (zh->m_staticrank)
1164                     pstaticrank = key.mem[0];
1165             }
1166             (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1167         }
1168         /* no more items */
1169         if (psysno)
1170         {   /* we had - at least - one record */
1171             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1172             /* insert the hit. A=Ascending */
1173             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1174             count++;
1175         }
1176         (*rc->end)(zh->reg, handle);
1177         rset_close(rfd);
1178     }
1179     zebraSet->hits = rset->hits_count;
1180
1181     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1182             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1183     for (i = 0; i < numTerms; i++)
1184     {
1185         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1186                 ZINT_FORMAT,
1187                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1188     }
1189     return ZEBRA_OK;
1190 }
1191
1192 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1193 {
1194     ZebraRankClass p = zh->reg->rank_classes;
1195     while (p && strcmp(p->control->name, name))
1196         p = p->next;
1197     if (p && !p->init_flag)
1198     {
1199         if (p->control->create)
1200             p->class_handle = (*p->control->create)(zh);
1201         p->init_flag = 1;
1202     }
1203     return p;
1204 }
1205
1206 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1207 {
1208     ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1209     p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1210     memcpy(p->control, ctrl, sizeof(*p->control));
1211     p->control->name = xstrdup(ctrl->name);
1212     p->init_flag = 0;
1213     p->next = reg->rank_classes;
1214     reg->rank_classes = p;
1215 }
1216
1217 void zebraRankDestroy(struct zebra_register *reg)
1218 {
1219     ZebraRankClass p = reg->rank_classes;
1220     while (p)
1221     {
1222         ZebraRankClass p_next = p->next;
1223         if (p->init_flag && p->control->destroy)
1224             (*p->control->destroy)(reg, p->class_handle);
1225         xfree(p->control->name);
1226         xfree(p->control);
1227         xfree(p);
1228         p = p_next;
1229     }
1230     reg->rank_classes = NULL;
1231 }
1232
1233 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1234                                  zint *hits_array, int *approx_array)
1235 {
1236     int no = 0;
1237     int i;
1238     for (i = 0; i<rset->no_children; i++)
1239         no += trav_rset_for_termids(rset->children[i],
1240                                     (termid_array ? termid_array + no : 0),
1241                                     (hits_array ? hits_array + no : 0),
1242                                     (approx_array ? approx_array + no : 0));
1243     if (rset->term)
1244     {
1245         if (termid_array)
1246             termid_array[no] = rset->term;
1247         if (hits_array)
1248             hits_array[no] = rset->hits_count;
1249         if (approx_array)
1250             approx_array[no] = rset->hits_approx;
1251 #if 0
1252         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1253                 " count=" ZINT_FORMAT,
1254                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1255 #endif
1256         no++;
1257     }
1258     return no;
1259 }
1260
1261 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1262                                    int *num_terms)
1263 {
1264     ZebraSet sset = resultSetGet(zh, setname);
1265     *num_terms = 0;
1266     if (sset)
1267     {
1268         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1269         return ZEBRA_OK;
1270     }
1271     return ZEBRA_FAIL;
1272 }
1273
1274 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1275                                      int no, zint *count, int *approx,
1276                                      char *termbuf, size_t *termlen,
1277                                      const char **term_ref_id)
1278 {
1279     ZebraSet sset = resultSetGet(zh, setname);
1280     if (sset)
1281     {
1282         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1283         if (no >= 0 && no < num_terms)
1284         {
1285             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1286             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1287             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1288             
1289             trav_rset_for_termids(sset->rset, term_array,
1290                                   hits_array, approx_array);
1291
1292             if (count)
1293                 *count = hits_array[no];
1294             if (approx)
1295                 *approx = approx_array[no];
1296             if (termbuf)
1297             {
1298                 char *inbuf = term_array[no]->name;
1299                 size_t inleft = strlen(inbuf);
1300                 size_t outleft = *termlen - 1;
1301
1302                 if (zh->iconv_from_utf8 != 0)
1303                 {
1304                     char *outbuf = termbuf;
1305                     size_t ret;
1306                     
1307                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1308                                     &outbuf, &outleft);
1309                     if (ret == (size_t)(-1))
1310                         *termlen = 0;
1311                     else
1312                     {
1313                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1314                                   &outbuf, &outleft);
1315                         *termlen = outbuf - termbuf;
1316                     }
1317                 }
1318                 else
1319                 {
1320                     if (inleft > outleft)
1321                         inleft = outleft;
1322                     *termlen = inleft;
1323                     memcpy(termbuf, inbuf, *termlen);
1324                 }
1325                 termbuf[*termlen] = '\0';
1326             }
1327             if (term_ref_id)
1328                 *term_ref_id = term_array[no]->ref_id;
1329
1330             xfree(term_array);
1331             xfree(hits_array);
1332             xfree(approx_array);
1333             return ZEBRA_OK;
1334         }
1335     }
1336     return ZEBRA_FAIL;
1337 }
1338
1339 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1340                                     zint sysno, zebra_snippets *snippets)
1341 {
1342     ZebraSet sset = resultSetGet(zh, setname);
1343     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1344             setname, sysno);
1345     if (!sset)
1346         return ZEBRA_FAIL;
1347     else
1348     {
1349         struct rset_key_control *kc = zebra_key_control_create(zh);
1350         NMEM nmem = nmem_create();
1351         struct it_key key;
1352         RSET rsets[2], rset_comb;
1353         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1354                                           res_get(zh->res, "setTmpDir"),0 );
1355         
1356         TERMID termid;
1357         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1358         
1359         key.mem[0] = sysno;
1360         key.mem[1] = 0;
1361         key.mem[2] = 0;
1362         key.mem[3] = 0;
1363         key.len = 2;
1364         rset_write(rsfd, &key);
1365         rset_close(rsfd);
1366
1367         rsets[0] = rset_temp;
1368         rsets[1] = rset_dup(sset->rset);
1369         
1370         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1371
1372         rsfd = rset_open(rset_comb, RSETF_READ);
1373
1374         while (rset_read(rsfd, &key, &termid))
1375         {
1376             if (termid)
1377             {
1378                 struct ord_list *ol;
1379                 for (ol = termid->ol; ol; ol = ol->next)
1380                 {
1381                     zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1382                                           ol->ord, termid->name);
1383                 }
1384             }
1385         }
1386         rset_close(rsfd);
1387         
1388         rset_delete(rset_comb);
1389         nmem_destroy(nmem);
1390         kc->dec(kc);
1391     }
1392     return ZEBRA_OK;
1393 }
1394
1395 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, 
1396                                       const char **basenames, int num_bases,
1397                                       zint recid,
1398                                       zint *sysnos, int *no_sysnos)
1399 {
1400     ZEBRA_RES res = ZEBRA_OK;
1401     int sysnos_offset = 0;
1402     int i;
1403     
1404     if (!zh->reg->isamb || !zh->m_segment_indexing)
1405     {
1406         if (sysnos_offset < *no_sysnos)
1407             *sysnos = recid;
1408         sysnos_offset++;
1409     }
1410     else
1411     {
1412         for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1413         {
1414             const char *database = basenames[i];
1415             if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1416             {
1417                 const char *index_type = "w";
1418                 const char *use_string = "_ALLRECORDS";
1419                 int ord;
1420                 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1421                 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1422                                                    index_type, use_string);
1423                 if (ord != -1)
1424                 {
1425                     char ord_buf[32];
1426                     int ord_len = key_SU_encode(ord, ord_buf);
1427                     char *info;
1428                 
1429                     ord_buf[ord_len] = '\0';
1430                 
1431                     info = dict_lookup(zh->reg->dict, ord_buf);
1432                     if (info)
1433                     {
1434                         if (*info != sizeof(ISAM_P))
1435                         {
1436                             res = ZEBRA_FAIL;
1437                         }
1438                         else
1439                         {
1440                             ISAM_P isam_p;
1441                             ISAMB_PP pt;
1442                             struct it_key key_until, key_found;
1443                             int i = 0;
1444                             int r;
1445                         
1446                             memcpy(&isam_p, info+1, sizeof(ISAM_P));
1447                         
1448                             pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1449                             if (!pt)
1450                                 res = ZEBRA_FAIL;
1451                             else
1452                             {
1453                                 key_until.mem[i++] = recid;
1454                                 key_until.mem[i++] = 0;  /* section_id */
1455                                 if (zh->m_segment_indexing)
1456                                     key_until.mem[i++] = 0; /* segment */
1457                                 key_until.mem[i++] = 0;
1458                                 key_until.len = i;
1459                             
1460                                 r = isamb_pp_forward(pt, &key_found, &key_until);
1461                                 while (r && key_found.mem[0] == recid)
1462                                 {
1463                                     if (sysnos_offset < *no_sysnos)
1464                                         sysnos[sysnos_offset++] = 
1465                                             key_found.mem[key_found.len-1];
1466                                     r = isamb_pp_read(pt, &key_found);
1467                                 }
1468                                 isamb_pp_close(pt);
1469                             }
1470                         }
1471                     }
1472                 }
1473             }
1474         }
1475     }
1476     *no_sysnos = sysnos_offset;
1477     return res;
1478 }
1479
1480 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, 
1481                                       const char *setname,
1482                                       zint recid,
1483                                       zint *sysnos, int *no_sysnos)
1484 {
1485     const char **basenames;
1486     int num_bases;
1487     ZEBRA_RES res;
1488
1489     res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1490     if (res != ZEBRA_OK)
1491         return ZEBRA_FAIL;
1492
1493     return zebra_recid_to_sysno(zh, basenames, num_bases,
1494                                 recid, sysnos, no_sysnos);
1495 }
1496
1497 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1498                      zint approx_limit)
1499 {
1500     zint psysno = 0;
1501     struct it_key key;
1502     RSFD rfd;
1503
1504     yaz_log(YLOG_DEBUG, "count_set");
1505
1506     rset->hits_limit = approx_limit;
1507
1508     *count = 0;
1509     rfd = rset_open(rset, RSETF_READ);
1510     while (rset_read(rfd, &key,0 /* never mind terms */))
1511     {
1512         if (key.mem[0] != psysno)
1513         {
1514             psysno = key.mem[0];
1515             if (rfd->counted_items >= rset->hits_limit)
1516                 break;
1517         }
1518     }
1519     rset_close(rfd);
1520     *count = rset->hits_count;
1521 }
1522                    
1523
1524 /*
1525  * Local variables:
1526  * c-basic-offset: 4
1527  * indent-tabs-mode: nil
1528  * End:
1529  * vim: shiftwidth=4 tabstop=8 expandtab
1530  */
1531