Make section_id part of multi-value sort (one sort chunk / section).
[idzebra-moved-to-github.git] / index / zsets.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20
21 #include <stdio.h>
22 #include <assert.h>
23 #ifdef WIN32
24 #include <io.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include "index.h"
30 #include "rank.h"
31 #include <yaz/diagbib1.h>
32 #include <rset.h>
33
34 #define ZSET_SORT_MAX_LEVEL 10
35
36 struct zebra_set_term_entry {
37     int reg_type;
38     char *db;
39     char *index_name;
40     char *term;
41 };
42
43 struct zebra_set {
44     char *name;
45     RSET rset;
46     NMEM nmem;
47     NMEM rset_nmem; /* for creating the rsets in */
48     zint hits;
49     int num_bases;
50     const char **basenames;
51     Z_RPNQuery *rpn;
52     Z_SortKeySpecList *sortSpec;
53     struct zset_sort_info *sort_info;
54     struct zebra_set_term_entry *term_entries;
55     int term_entries_max;
56     struct zebra_set *next;
57     int locked;
58     int estimated_hit_count;
59
60     zint cache_position;  /* last position */
61     RSFD cache_rfd;       /* rfd (NULL if not existing) */
62     zint cache_psysno;    /* sysno for last position */
63     zint approx_limit;    /* limit before we do approx */
64 };
65
66 struct zset_sort_entry {
67     zint sysno;
68     int score;
69 };
70
71 struct zset_sort_info {
72     int max_entries;
73     int num_entries;
74     struct zset_sort_entry *all_entries;
75     struct zset_sort_entry **entries;
76 };
77
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
83
84 static void loglevels(void)
85 {
86     if (log_level_set)
87         return;
88     log_level_sort = yaz_log_module_level("sorting");
89     log_level_searchhits = yaz_log_module_level("searchhits");
90     log_level_searchterms = yaz_log_module_level("searchterms");
91     log_level_resultsets = yaz_log_module_level("resultsets");
92     log_level_set = 1;
93 }
94
95
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97                                  Z_RPNQuery *rpn, ZebraSet sset)
98 {
99     RSET rset = 0;
100     Z_SortKeySpecList *sort_sequence;
101     int sort_status, i;
102     ZEBRA_RES res = ZEBRA_OK;
103
104     sort_sequence = (Z_SortKeySpecList *)
105         nmem_malloc(nmem, sizeof(*sort_sequence));
106     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107     sort_sequence->specs = (Z_SortKeySpec **)
108         nmem_malloc(nmem, sort_sequence->num_specs *
109                     sizeof(*sort_sequence->specs));
110     for (i = 0; i<sort_sequence->num_specs; i++)
111         sort_sequence->specs[i] = 0;
112     
113     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
114
115     res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
116                          nmem, rset_nmem,
117                          sort_sequence,
118                          sset->num_bases, sset->basenames,
119                          &rset);
120     if (res != ZEBRA_OK)
121     {
122         sset->rset = 0;
123         return res;
124     }
125     for (i = 0; sort_sequence->specs[i]; i++)
126         ;
127     sort_sequence->num_specs = i;
128     rset->hits_limit = sset->approx_limit;
129     if (!i)
130     {
131         res = resultSetRank(zh, sset, rset, rset_nmem);
132     }
133     else
134     {
135         res = resultSetSortSingle(zh, nmem, sset, rset,
136                                   sort_sequence, &sort_status);
137     }
138     sset->rset = rset;
139     return res;
140 }
141
142
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144                           int num_bases, char **basenames,
145                           const char *setname,
146                           zint *hits, int *estimated_hit_count)
147 {
148     ZebraSet zebraSet;
149     int i;
150     ZEBRA_RES res;
151
152     *hits = 0;
153     *estimated_hit_count = 0;
154
155     zebraSet = resultSetAdd(zh, setname, 1);
156     if (!zebraSet)
157         return ZEBRA_FAIL;
158     zebraSet->locked = 1;
159     zebraSet->rpn = 0;
160     zebraSet->nmem = m;
161     zebraSet->rset_nmem = nmem_create(); 
162
163     zebraSet->num_bases = num_bases;
164     zebraSet->basenames = 
165         nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166     for (i = 0; i<num_bases; i++)
167         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
168
169     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
170                           rpn, zebraSet);
171     *hits = zebraSet->hits;
172     if (zebraSet->estimated_hit_count)
173         *estimated_hit_count = 1;
174
175     if (zebraSet->rset)
176         zebraSet->rpn = rpn;
177     zebraSet->locked = 0;
178     if (!zebraSet->rset)
179         return ZEBRA_FAIL;
180     return res;
181 }
182
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184                       const char *db, const char *index_name, 
185                       const char *term)
186 {
187     assert(zh); /* compiler shut up */
188     if (!s->nmem)
189         s->nmem = nmem_create();
190     if (!s->term_entries)
191     {
192         int i;
193         s->term_entries_max = 1000;
194         s->term_entries =
195             nmem_malloc(s->nmem, s->term_entries_max * 
196                         sizeof(*s->term_entries));
197         for (i = 0; i < s->term_entries_max; i++)
198             s->term_entries[i].term = 0;
199     }
200     if (s->hits < s->term_entries_max)
201     {
202         s->term_entries[s->hits].reg_type = reg_type;
203         s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
206     }
207     (s->hits)++;
208 }
209
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
211 {
212     ZebraSet s;
213     int i;
214
215     for (s = zh->sets; s; s = s->next)
216         if (!strcmp(s->name, name))
217             break;
218     
219     if (!log_level_set)
220         loglevels();
221     if (s)
222     {
223         yaz_log(log_level_resultsets, "updating result set %s", name);
224         if (!ov || s->locked)
225             return NULL;
226         if (s->rset)
227         {
228             if (s->cache_rfd)
229                 rset_close(s->cache_rfd);
230             rset_delete(s->rset);
231         }
232         if (s->rset_nmem)
233             nmem_destroy(s->rset_nmem);
234         if (s->nmem)
235             nmem_destroy(s->nmem);
236     }
237     else
238     {
239         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
240
241         yaz_log(log_level_resultsets, "adding result set %s", name);
242         s = (ZebraSet) xmalloc(sizeof(*s));
243         s->next = zh->sets;
244         zh->sets = s;
245         s->name = xstrdup(name);
246
247         s->sort_info = (struct zset_sort_info *)
248             xmalloc(sizeof(*s->sort_info));
249         s->sort_info->max_entries = atoi(sort_max_str);
250         if (s->sort_info->max_entries < 2)
251             s->sort_info->max_entries = 2;
252
253         s->sort_info->entries = (struct zset_sort_entry **)
254             xmalloc(sizeof(*s->sort_info->entries) *
255                     s->sort_info->max_entries);
256         s->sort_info->all_entries = (struct zset_sort_entry *)
257             xmalloc(sizeof(*s->sort_info->all_entries) *
258                     s->sort_info->max_entries);
259         for (i = 0; i < s->sort_info->max_entries; i++)
260             s->sort_info->entries[i] = s->sort_info->all_entries + i;
261     }
262     s->locked = 0;
263     s->term_entries = 0;
264     s->hits = 0;
265     s->rset = 0;
266     s->rset_nmem = 0;
267     s->nmem = 0;
268     s->rpn = 0;
269     s->sortSpec = 0;
270     s->cache_position = 0;
271     s->cache_rfd = 0;
272     s->approx_limit = zh->approx_limit;
273     s->estimated_hit_count = 0;
274     return s;
275 }
276
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
278 {
279     ZebraSet s;
280
281     for (s = zh->sets; s; s = s->next)
282         if (!strcmp(s->name, name))
283         {
284             if (!s->term_entries && !s->rset && s->rpn)
285             {
286                 NMEM nmem = nmem_create();
287                 yaz_log(log_level_resultsets, "research %s", name);
288                 if (!s->rset_nmem)
289                     s->rset_nmem = nmem_create();
290                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291                 if (s->rset && s->sortSpec)
292                 {
293                     int sort_status;
294                     yaz_log(log_level_resultsets, "resort %s", name);
295                     resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
296                                         &sort_status);
297                 }
298                 nmem_destroy(nmem);
299             }
300             return s;
301         }
302     return NULL;
303 }
304
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306                                 const char ***basenames, int *num_bases)
307 {
308     ZebraSet sset = resultSetGet(zh, setname);
309     if (!sset)
310         return ZEBRA_FAIL;
311     *basenames = sset->basenames;
312     *num_bases = sset->num_bases;
313     return ZEBRA_OK;
314
315 }
316
317 void resultSetInvalidate(ZebraHandle zh)
318 {
319     ZebraSet s = zh->sets;
320     
321     yaz_log(log_level_resultsets, "invalidating result sets");
322     for (; s; s = s->next)
323     {
324         if (s->rset)
325         {
326             if (s->cache_rfd)
327                 rset_close(s->cache_rfd);
328             rset_delete(s->rset);
329         }
330         s->rset = 0;
331         s->cache_rfd = 0;
332         s->cache_position = 0;
333         if (s->rset_nmem)
334             nmem_destroy(s->rset_nmem);
335         s->rset_nmem=0;
336     }
337 }
338
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
340 {
341     ZebraSet * ss = &zh->sets;
342     int i;
343     
344     if (statuses)
345         for (i = 0; i<num; i++)
346             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
347     while (*ss)
348     {
349         int i = -1;
350         ZebraSet s = *ss;
351         if (num >= 0)
352         {
353             for (i = 0; i<num; i++)
354                 if (!strcmp(s->name, names[i]))
355                 {
356                     if (statuses)
357                         statuses[i] = Z_DeleteStatus_success;
358                     i = -1;
359                     break;
360                 }
361         }
362         if (i < 0)
363         {
364             *ss = s->next;
365             
366             xfree(s->sort_info->all_entries);
367             xfree(s->sort_info->entries);
368             xfree(s->sort_info);
369             
370             if (s->nmem)
371                 nmem_destroy(s->nmem);
372             if (s->rset)
373             {
374                 if (s->cache_rfd)
375                     rset_close(s->cache_rfd);
376                 rset_delete(s->rset);
377             }
378             if (s->rset_nmem)
379                 nmem_destroy(s->rset_nmem);
380             xfree(s->name);
381             xfree(s);
382         }
383         else
384             ss = &s->next;
385     }
386 }
387
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
389                                                  const char *name, 
390                                                  zint start, int num)
391 {
392     zint pos_small[10];
393     zint *pos = pos_small;
394     ZebraMetaRecord *mr;
395     int i;
396
397     if (num > 10000 || num <= 0)
398         return 0;
399
400     if (num > 10)
401         pos = xmalloc(sizeof(*pos) * num);
402     
403     for (i = 0; i<num; i++)
404         pos[i] = start+i;
405
406     mr = zebra_meta_records_create(zh, name, num, pos);
407     
408     if (num > 10)
409         xfree(pos);
410     return mr;
411 }
412
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
414                                            int num, zint *positions)
415 {
416     ZebraSet sset;
417     ZebraMetaRecord *sr = 0;
418     RSET rset;
419     int i;
420     struct zset_sort_info *sort_info;
421     size_t sysno_mem_index = 0;
422
423     if (zh->m_staticrank)
424         sysno_mem_index = 1;
425
426     if (!log_level_set)
427         loglevels();
428     if (!(sset = resultSetGet(zh, name)))
429         return NULL;
430     if (!(rset = sset->rset))
431     {
432         if (!sset->term_entries)
433             return 0;
434         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435         for (i = 0; i<num; i++)
436         {
437             sr[i].sysno = 0;
438             sr[i].score = -1;
439             sr[i].term = 0;
440             sr[i].db = 0;
441
442             if (positions[i] <= sset->term_entries_max)
443             {
444                 sr[i].term = sset->term_entries[positions[i]-1].term;
445                 sr[i].db = sset->term_entries[positions[i]-1].db;
446             }
447         }
448     }
449     else
450     {
451         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452         for (i = 0; i<num; i++)
453         {
454             sr[i].sysno = 0;
455             sr[i].score = -1;
456             sr[i].term = 0;
457             sr[i].db = 0;
458         }
459         sort_info = sset->sort_info;
460         if (sort_info)
461         {
462             zint position;
463             
464             for (i = 0; i<num; i++)
465             {
466                 position = positions[i];
467                 if (position > 0 && position <= sort_info->num_entries)
468                 {
469                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470                             " (sorted)", position);
471                     sr[i].sysno = sort_info->entries[position-1]->sysno;
472                     sr[i].score = sort_info->entries[position-1]->score;
473                 }
474             }
475         }
476         /* did we really get all entries using sort ? */
477         for (i = 0; i<num; i++)
478         {
479             if (!sr[i].sysno)
480                 break;
481         }
482         if (i < num) /* nope, get the rest, unsorted - sorry */
483         {
484             zint position = 0;
485             int num_i = 0;
486             zint psysno = 0;
487             RSFD rfd;
488             struct it_key key;
489             
490             if (sort_info)
491                 position = sort_info->num_entries;
492             while (num_i < num && positions[num_i] <= position)
493                 num_i++;
494             
495             if (sset->cache_rfd &&
496                 num_i < num && positions[num_i] > sset->cache_position)
497             {
498                 position = sset->cache_position;
499                 rfd = sset->cache_rfd;
500                 psysno = sset->cache_psysno;
501             }
502             else
503             {
504                 if (sset->cache_rfd)
505                     rset_close(sset->cache_rfd);
506                 rfd = rset_open(rset, RSETF_READ);
507             }
508             while (num_i < num && rset_read(rfd, &key, 0))
509             {
510                 zint this_sys = key.mem[sysno_mem_index];
511                 if (this_sys != psysno)
512                 {
513                     psysno = this_sys;
514                     if (sort_info)
515                     {
516                         /* determine we alreay have this in our set */
517                         for (i = sort_info->num_entries; --i >= 0; )
518                             if (psysno == sort_info->entries[i]->sysno)
519                                 break;
520                         if (i >= 0)
521                             continue;
522                     }
523                     position++;
524                     assert(num_i < num);
525                     if (position == positions[num_i])
526                     {
527                         sr[num_i].sysno = psysno;
528                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529                         sr[num_i].score = -1;
530                         num_i++;
531                     }
532                 }
533             }
534             sset->cache_position = position;
535             sset->cache_psysno = psysno;
536             sset->cache_rfd = rfd;
537         }
538     }
539     return sr;
540 }
541
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
543                                 int num)
544 {
545     assert(zh); /* compiler shut up about unused arg */
546     xfree(records);
547 }
548
549 struct sortKeyInfo {
550     int relation;
551     int *ord; /* array of ord for each database searched */
552     int *numerical; /* array of ord for each database searched */
553     const char *index_type;
554 };
555
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557                          int database_no,
558                          struct sortKeyInfo *criteria, int num_criteria,
559                          zint sysno,
560                          char *cmp_buf[], char *tmp_cmp_buf[])
561 {
562     struct zset_sort_entry *new_entry = NULL;
563     struct zset_sort_info *sort_info = sset->sort_info;
564     int i, j;
565     WRBUF w = wrbuf_alloc();
566
567     zebra_sort_sysno(zh->reg->sort_index, sysno);
568     for (i = 0; i<num_criteria; i++)
569     {
570         char *this_entry_buf = tmp_cmp_buf[i];
571         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
572         
573         if (criteria[i].ord[database_no] != -1)
574         {
575             yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576                     criteria[i].ord[database_no]);
577             zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
578             wrbuf_rewind(w);
579             if (zebra_sort_read(zh->reg->sort_index, 0, w))
580             {
581                 /* consider each sort entry and take lowest/highest one
582                    of the one as sorting key depending on whether sort is
583                    ascending/descending */
584                 int off = 0;
585                 while (off != wrbuf_len(w))
586                 {
587                     size_t l = strlen(wrbuf_buf(w)+off);
588                     assert(off < wrbuf_len(w));
589
590                     if (l >= SORT_IDX_ENTRYSIZE)
591                         l = SORT_IDX_ENTRYSIZE-1;
592                     if ( (off == 0)
593                          || (criteria[i].relation == 'A'
594                              && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
595                          || (criteria[i].relation == 'D'
596                              && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
597                         )
598                     {
599                         memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
600                         this_entry_buf[l] = '\0';
601                     }
602                     off += 1 + strlen(wrbuf_buf(w)+off);
603                 }
604             }
605         }
606         else
607         {
608             yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
609         }
610     }
611     wrbuf_destroy(w);
612     i = sort_info->num_entries;
613     while (--i >= 0)
614     {
615         int rel = 0;
616         for (j = 0; j<num_criteria; j++)
617         {
618             char *this_entry_buf = tmp_cmp_buf[j];
619             char *other_entry_buf = 
620                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
621             if (criteria[j].numerical[database_no])
622             {
623                 char this_entry_org[1024];
624                 char other_entry_org[1024];
625                 double diff;
626                 const char *index_type = criteria[j].index_type;
627                 zebra_term_untrans(zh, index_type, this_entry_org,
628                                    this_entry_buf);
629                 zebra_term_untrans(zh, index_type, other_entry_org,
630                                    other_entry_buf);
631                 diff = atof(this_entry_org) - atof(other_entry_org);
632                 
633                 if (diff > 0.0)
634                     rel = 1;
635                 else if (diff < 0.0)
636                     rel = -1;
637                 else
638                     rel = 0;
639             }
640             else
641             {
642                 rel = memcmp(this_entry_buf, other_entry_buf,
643                              SORT_IDX_ENTRYSIZE);
644             }
645             /* when the compare is equal, continue to next criteria, 
646                else break out */
647             if (rel)
648                 break;
649         }       
650         if (!rel)
651             break;
652         if (criteria[j].relation == 'A')
653         {
654             if (rel > 0)
655                 break;
656         }
657         else if (criteria[j].relation == 'D')
658         {
659             if (rel < 0)
660                 break;
661         }
662     }
663     ++i;
664     yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
665     j = sort_info->max_entries;
666     if (i == j){
667         yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
668         return;
669     }
670
671     if (sort_info->num_entries == j)
672         --j;
673     else
674         j = (sort_info->num_entries)++;
675     new_entry = sort_info->entries[j];
676     /* move up all higher entries (to make room) */
677     while (j != i)
678     {
679         int k;
680         for (k = 0; k<num_criteria; k++)
681         {
682             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
683             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
684             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
685         }
686         sort_info->entries[j] = sort_info->entries[j-1];
687         --j;
688     }
689     /* and insert the new entry at the correct place */
690     sort_info->entries[i] = new_entry;
691     assert(new_entry);
692     /* and add this to the compare buffer */
693     for (i = 0; i<num_criteria; i++)
694     {
695         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
696         char *this_entry_buf = tmp_cmp_buf[i];
697         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
698     }
699     new_entry->sysno = sysno;
700     new_entry->score = -1;
701 }
702
703 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
704                          zint sysno, int score, int relation)
705 {
706     struct zset_sort_entry *new_entry = NULL;
707     int i, j;
708     assert(zh); /* compiler shut up about unused arg */
709
710     i = sort_info->num_entries;
711     while (--i >= 0)
712     {
713         int rel = 0;
714
715         rel = score - sort_info->entries[i]->score;
716
717         if (relation == 'D')
718         {
719             if (rel >= 0)
720                 break;
721         }
722         else if (relation == 'A')
723         {
724             if (rel <= 0)
725                 break;
726         }
727     }
728     ++i;
729     j = sort_info->max_entries;
730     if (i == j)
731         return;
732
733     if (sort_info->num_entries == j)
734         --j;
735     else
736         j = (sort_info->num_entries)++;
737     
738     new_entry = sort_info->entries[j];
739     while (j != i)
740     {
741         sort_info->entries[j] = sort_info->entries[j-1];
742         --j;
743     }
744     sort_info->entries[i] = new_entry;
745     assert(new_entry);
746     new_entry->sysno = sysno;
747     new_entry->score = score;
748 }
749
750 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
751 {
752     Z_RPNQuery *dst = 0;
753     ODR encode = odr_createmem(ODR_ENCODE);
754     ODR decode = odr_createmem(ODR_DECODE);
755
756     if (z_RPNQuery(encode, &src, 0, 0))
757     {
758         int len;
759         char *buf = odr_getbuf(encode, &len, 0);
760
761         if (buf)
762         {
763             odr_setbuf(decode, buf, len, 0);
764             z_RPNQuery(decode, &dst, 0, 0);
765         }
766     }
767     nmem_transfer(nmem, decode->mem);
768     odr_destroy(encode);
769     odr_destroy(decode);
770     return dst;
771 }
772
773 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
774 {
775     Z_SortKeySpecList *dst = 0;
776     ODR encode = odr_createmem(ODR_ENCODE);
777     ODR decode = odr_createmem(ODR_DECODE);
778
779     if (z_SortKeySpecList(encode, &src, 0, 0))
780     {
781         int len;
782         char *buf = odr_getbuf(encode, &len, 0);
783
784         if (buf)
785         {
786             odr_setbuf(decode, buf, len, 0);
787             z_SortKeySpecList(decode, &dst, 0, 0);
788         }
789     }
790     nmem_transfer(nmem, decode->mem);
791     odr_destroy(encode);
792     odr_destroy(decode);
793     return dst;
794 }
795
796 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
797                         ZebraSet rset)
798 {
799     ZebraSet nset;
800     int i;
801
802     nset = resultSetAdd(zh, setname, 1);
803     if (!nset)
804         return 0;
805
806     nset->nmem = nmem_create();
807
808     nset->num_bases = rset->num_bases;
809     nset->basenames = 
810         nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
811     for (i = 0; i<rset->num_bases; i++)
812         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
813
814     if (rset->rset)
815         nset->rset = rset_dup(rset->rset);
816     if (rset->rpn)
817         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
818     return nset;
819 }
820
821 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
822                         int num_input_setnames, const char **input_setnames,
823                         const char *output_setname,
824                         Z_SortKeySpecList *sort_sequence, int *sort_status)
825 {
826     ZebraSet sset;
827     RSET rset;
828
829     if (num_input_setnames == 0)
830     {
831         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
832         return ZEBRA_FAIL;
833     }
834     if (num_input_setnames > 1)
835     {
836         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
837         return ZEBRA_FAIL;
838     }
839     if (!log_level_set)
840         loglevels();
841     yaz_log(log_level_sort, "result set sort input=%s output=%s",
842             *input_setnames, output_setname);
843     sset = resultSetGet(zh, input_setnames[0]);
844     if (!sset)
845     {
846         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
847                        input_setnames[0]);
848         return ZEBRA_FAIL;
849     }
850     if (!(rset = sset->rset))
851     {
852         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
853                        input_setnames[0]);
854         return ZEBRA_FAIL;
855     }
856     if (strcmp(output_setname, input_setnames[0]))
857         sset = resultSetClone(zh, output_setname, sset);
858     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
859     return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
860                                sort_status);
861 }
862
863 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
864                               ZebraSet sset, RSET rset,
865                               Z_SortKeySpecList *sort_sequence,
866                               int *sort_status)
867 {
868     int i;
869     int ib;
870     int n = 0;
871     zint kno = 0;
872     zint psysno = 0;
873     struct it_key key;
874     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
875     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
876     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
877     int num_criteria;
878     RSFD rfd;
879     TERMID termid;
880     TERMID *terms;
881     int numTerms = 0;
882     size_t sysno_mem_index = 0;
883     
884     int numbases = zh->num_basenames;
885     yaz_log(log_level_sort, "searching %d databases",numbases);
886
887     if (zh->m_staticrank)
888         sysno_mem_index = 1;
889
890     assert(nmem); /* compiler shut up about unused param */
891     sset->sort_info->num_entries = 0;
892
893     rset_getterms(rset, 0, 0, &n);
894     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
895     rset_getterms(rset, terms, n, &numTerms);
896
897     sset->hits = 0;
898     num_criteria = sort_sequence->num_specs;
899     if (num_criteria > ZSET_SORT_MAX_LEVEL)
900         num_criteria = ZSET_SORT_MAX_LEVEL;
901     /* set up the search criteria */
902     for (i = 0; i < num_criteria; i++)
903     {
904         Z_SortKeySpec *sks = sort_sequence->specs[i];
905         Z_SortKey *sk;
906         ZEBRA_RES res;
907         
908         sort_criteria[i].ord = (int *)
909             nmem_malloc(nmem, sizeof(int)*numbases);
910         sort_criteria[i].numerical = (int *)
911             nmem_malloc(nmem, sizeof(int)*numbases);
912         
913         /* initialize ord and numerical for each database */
914         for (ib = 0; ib < numbases; ib++)
915         {
916             sort_criteria[i].ord[ib] = -1;
917             sort_criteria[i].numerical[ib] = 0;
918         }
919
920         if (sks->which == Z_SortKeySpec_missingValueData)
921         {
922             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
923             return ZEBRA_FAIL;
924         }
925         if (*sks->sortRelation == Z_SortKeySpec_ascending)
926             sort_criteria[i].relation = 'A';
927         else if (*sks->sortRelation == Z_SortKeySpec_descending)
928             sort_criteria[i].relation = 'D';
929         else
930         {
931             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
932             return ZEBRA_FAIL;
933         }
934         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
935         {
936             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
937             return ZEBRA_FAIL;
938         }
939         else if (sks->sortElement->which != Z_SortElement_generic)
940         {
941             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
942             return ZEBRA_FAIL;
943         }       
944         sk = sks->sortElement->u.generic;
945         switch (sk->which)
946         {
947         case Z_SortKey_sortField:
948             yaz_log(log_level_sort, "key %d is of type sortField", i+1);
949             for (ib = 0; ib < numbases; ib++)
950             {
951                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
952                 sort_criteria[i].numerical[ib] = 0;
953                 sort_criteria[i].ord[ib] = 
954                     zebraExplain_lookup_attr_str(zh->reg->zei,
955                                                  zinfo_index_category_sort,
956                                                  0, sk->u.sortField);
957                 if (sks->which != Z_SortKeySpec_null
958                     && sort_criteria[i].ord[ib] == -1)
959                 {
960                     zebra_setError(zh,
961                                    YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
962                     return ZEBRA_FAIL;
963                 }
964             }
965             break;
966         case Z_SortKey_elementSpec:
967             yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
968             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
969             return ZEBRA_FAIL;
970         case Z_SortKey_sortAttributes:
971             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
972             /* for every database we searched, get the sort index file
973                id (ord) and its numerical indication and store them in
974                the sort_criteria */
975             for (ib = 0; ib < numbases; ib++)
976             {
977                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
978                 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
979                                          &sort_criteria[i].ord[ib],
980                                          &sort_criteria[i].numerical[ib]);
981             }
982             
983             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
984                 return ZEBRA_FAIL;
985             break;
986         }
987         /* right now we look up the index type based on the first database
988            if the index_type's can differ between the indexes of different
989            databases (which i guess they can?) then we have to store the
990            index types for each database, just like the ord and numerical */
991         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
992                                     &sort_criteria[i].index_type,
993                                     0, 0))
994         {
995             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
996             return ZEBRA_FAIL;
997         }
998     }
999     /* allocate space for each cmpare buf + one extra for tmp comparison */
1000     /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1001        all other result entries to compare against. This is slowly filled when records are processed.
1002        tmp_cmp_buf is an array with a value of the current record for each criteria
1003     */
1004     for (i = 0; i<num_criteria; i++)
1005     {
1006         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1007                              * SORT_IDX_ENTRYSIZE);
1008         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1009     }
1010     rfd = rset_open(rset, RSETF_READ);
1011     while (rset_read(rfd, &key, &termid))
1012     {
1013         zint this_sys = key.mem[sysno_mem_index];
1014         if (log_level_searchhits)
1015             key_logdump_txt(log_level_searchhits, &key, termid->name);
1016         kno++;
1017         if (this_sys != psysno)
1018         {
1019             int database_no = 0;
1020             if ((sset->hits & 255) == 0 && zh->break_handler_func)
1021             {
1022                 if (zh->break_handler_func(zh->break_handler_data))
1023                 {
1024                     rset_set_hits_limit(rset, 0);
1025                     break;
1026                 }
1027             }
1028             (sset->hits)++;
1029             psysno = this_sys;
1030
1031             /* determine database from the term, but only bother if more than
1032                one database is in use*/
1033             if (numbases > 1 && termid->ol)
1034             {
1035                 const char *this_db = 0;
1036                 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord,  0, &this_db, 0)
1037                     == 0 && this_db)
1038                 {
1039                     for (ib = 0; ib < numbases; ib++)
1040                         if (!strcmp(this_db, zh->basenames[ib]))
1041                             database_no = ib;
1042                 }
1043             }
1044 #if 0
1045             yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1046                 database_no);
1047             ord_list_print(termid->ol);
1048 #endif
1049             resultSetInsertSort(zh, sset, database_no,
1050                                 sort_criteria, num_criteria, psysno, cmp_buf,
1051                                 tmp_cmp_buf);
1052         }
1053     }
1054     rset_close(rfd);
1055
1056     /* free the compare buffers */
1057     for (i = 0; i<num_criteria; i++)
1058     {
1059         xfree(cmp_buf[i]);
1060         xfree(tmp_cmp_buf[i]);
1061     }
1062
1063     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1064             kno, sset->hits);   
1065     for (i = 0; i < numTerms; i++)
1066         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1067                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1068     *sort_status = Z_SortResponse_success;
1069     return ZEBRA_OK;
1070 }
1071
1072 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1073 {
1074     ZebraSet s;
1075
1076     if ((s = resultSetGet(zh, resultSetId)))
1077         return s->rset;
1078     return NULL;
1079 }
1080
1081 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1082                         RSET rset, NMEM nmem)
1083 {
1084     struct it_key key;
1085     TERMID termid;
1086     TERMID *terms;
1087     zint kno = 0;
1088     int numTerms = 0;
1089     int n = 0;
1090     int i;
1091     ZebraRankClass rank_class;
1092     struct zset_sort_info *sort_info;
1093     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1094     size_t sysno_mem_index = 0;
1095
1096     if (zh->m_staticrank)
1097         sysno_mem_index = 1;
1098
1099     if (!log_level_set)
1100         loglevels();
1101     sort_info = zebraSet->sort_info;
1102     sort_info->num_entries = 0;
1103     zebraSet->hits = 0;
1104     zebraSet->estimated_hit_count = 0;
1105     rset_getterms(rset, 0, 0, &n);
1106     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1107     rset_getterms(rset, terms, n, &numTerms);
1108
1109     rank_class = zebraRankLookup(zh, rank_handler_name);
1110     if (!rank_class)
1111     {
1112         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1113         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1114         return ZEBRA_FAIL;
1115     }
1116     else
1117     {
1118         RSFD rfd = rset_open(rset, RSETF_READ);
1119         struct rank_control *rc = rank_class->control;
1120         int score;
1121         zint count = 0;
1122         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1123                                      nmem, terms, numTerms);
1124         zint psysno = 0;  /* previous doc id / sys no */
1125         zint pstaticrank = 0; /* previous static rank */
1126         int stop_flag = 0;
1127         while (rset_read(rfd, &key, &termid))
1128         {
1129             zint this_sys = key.mem[sysno_mem_index];
1130
1131             zint seqno = key.mem[key.len-1];
1132             kno++;
1133             if (log_level_searchhits)
1134                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1135             if (this_sys != psysno) 
1136             {   /* new record .. */
1137                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1138                 {
1139                     if (zh->break_handler_func(zh->break_handler_data))
1140                     {
1141                         yaz_log(YLOG_LOG, "Aborted search");
1142                         stop_flag = 1;
1143                     }
1144                 }
1145                 if (rfd->counted_items > rset->hits_limit)
1146                     stop_flag = 1;
1147                 if (psysno)
1148                 {   /* only if we did have a previous record */
1149                     score = (*rc->calc)(handle, psysno, pstaticrank,
1150                                         &stop_flag);
1151                     /* insert the hit. A=Ascending */
1152                     resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1153                     count++;
1154                 }
1155                 if (stop_flag)
1156                 {
1157                     zebraSet->estimated_hit_count = 1;
1158                     rset_set_hits_limit(rset, 0);
1159                     break;
1160                 }
1161                 psysno = this_sys;
1162                 if (zh->m_staticrank)
1163                     pstaticrank = key.mem[0];
1164             }
1165             (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1166         }
1167         /* no more items */
1168         if (psysno)
1169         {   /* we had - at least - one record */
1170             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1171             /* insert the hit. A=Ascending */
1172             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1173             count++;
1174         }
1175         (*rc->end)(zh->reg, handle);
1176         rset_close(rfd);
1177     }
1178     zebraSet->hits = rset->hits_count;
1179
1180     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1181             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1182     for (i = 0; i < numTerms; i++)
1183     {
1184         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1185                 ZINT_FORMAT,
1186                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1187     }
1188     return ZEBRA_OK;
1189 }
1190
1191 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1192 {
1193     ZebraRankClass p = zh->reg->rank_classes;
1194     while (p && strcmp(p->control->name, name))
1195         p = p->next;
1196     if (p && !p->init_flag)
1197     {
1198         if (p->control->create)
1199             p->class_handle = (*p->control->create)(zh);
1200         p->init_flag = 1;
1201     }
1202     return p;
1203 }
1204
1205 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1206 {
1207     ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1208     p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1209     memcpy(p->control, ctrl, sizeof(*p->control));
1210     p->control->name = xstrdup(ctrl->name);
1211     p->init_flag = 0;
1212     p->next = reg->rank_classes;
1213     reg->rank_classes = p;
1214 }
1215
1216 void zebraRankDestroy(struct zebra_register *reg)
1217 {
1218     ZebraRankClass p = reg->rank_classes;
1219     while (p)
1220     {
1221         ZebraRankClass p_next = p->next;
1222         if (p->init_flag && p->control->destroy)
1223             (*p->control->destroy)(reg, p->class_handle);
1224         xfree(p->control->name);
1225         xfree(p->control);
1226         xfree(p);
1227         p = p_next;
1228     }
1229     reg->rank_classes = NULL;
1230 }
1231
1232 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1233                                  zint *hits_array, int *approx_array)
1234 {
1235     int no = 0;
1236     int i;
1237     for (i = 0; i<rset->no_children; i++)
1238         no += trav_rset_for_termids(rset->children[i],
1239                                     (termid_array ? termid_array + no : 0),
1240                                     (hits_array ? hits_array + no : 0),
1241                                     (approx_array ? approx_array + no : 0));
1242     if (rset->term)
1243     {
1244         if (termid_array)
1245             termid_array[no] = rset->term;
1246         if (hits_array)
1247             hits_array[no] = rset->hits_count;
1248         if (approx_array)
1249             approx_array[no] = rset->hits_approx;
1250 #if 0
1251         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1252                 " count=" ZINT_FORMAT,
1253                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1254 #endif
1255         no++;
1256     }
1257     return no;
1258 }
1259
1260 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1261                                    int *num_terms)
1262 {
1263     ZebraSet sset = resultSetGet(zh, setname);
1264     *num_terms = 0;
1265     if (sset)
1266     {
1267         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1268         return ZEBRA_OK;
1269     }
1270     return ZEBRA_FAIL;
1271 }
1272
1273 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1274                                      int no, zint *count, int *approx,
1275                                      char *termbuf, size_t *termlen,
1276                                      const char **term_ref_id)
1277 {
1278     ZebraSet sset = resultSetGet(zh, setname);
1279     if (sset)
1280     {
1281         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1282         if (no >= 0 && no < num_terms)
1283         {
1284             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1285             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1286             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1287             
1288             trav_rset_for_termids(sset->rset, term_array,
1289                                   hits_array, approx_array);
1290
1291             if (count)
1292                 *count = hits_array[no];
1293             if (approx)
1294                 *approx = approx_array[no];
1295             if (termbuf)
1296             {
1297                 char *inbuf = term_array[no]->name;
1298                 size_t inleft = strlen(inbuf);
1299                 size_t outleft = *termlen - 1;
1300
1301                 if (zh->iconv_from_utf8 != 0)
1302                 {
1303                     char *outbuf = termbuf;
1304                     size_t ret;
1305                     
1306                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1307                                     &outbuf, &outleft);
1308                     if (ret == (size_t)(-1))
1309                         *termlen = 0;
1310                     else
1311                     {
1312                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1313                                   &outbuf, &outleft);
1314                         *termlen = outbuf - termbuf;
1315                     }
1316                 }
1317                 else
1318                 {
1319                     if (inleft > outleft)
1320                         inleft = outleft;
1321                     *termlen = inleft;
1322                     memcpy(termbuf, inbuf, *termlen);
1323                 }
1324                 termbuf[*termlen] = '\0';
1325             }
1326             if (term_ref_id)
1327                 *term_ref_id = term_array[no]->ref_id;
1328
1329             xfree(term_array);
1330             xfree(hits_array);
1331             xfree(approx_array);
1332             return ZEBRA_OK;
1333         }
1334     }
1335     return ZEBRA_FAIL;
1336 }
1337
1338 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1339                                     zint sysno, zebra_snippets *snippets)
1340 {
1341     ZebraSet sset = resultSetGet(zh, setname);
1342     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1343             setname, sysno);
1344     if (!sset)
1345         return ZEBRA_FAIL;
1346     else
1347     {
1348         struct rset_key_control *kc = zebra_key_control_create(zh);
1349         NMEM nmem = nmem_create();
1350         struct it_key key;
1351         RSET rsets[2], rset_comb;
1352         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1353                                           res_get(zh->res, "setTmpDir"),0 );
1354         
1355         TERMID termid;
1356         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1357         
1358         key.mem[0] = sysno;
1359         key.mem[1] = 0;
1360         key.mem[2] = 0;
1361         key.mem[3] = 0;
1362         key.len = 2;
1363         rset_write(rsfd, &key);
1364         rset_close(rsfd);
1365
1366         rsets[0] = rset_temp;
1367         rsets[1] = rset_dup(sset->rset);
1368         
1369         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1370
1371         rsfd = rset_open(rset_comb, RSETF_READ);
1372
1373         while (rset_read(rsfd, &key, &termid))
1374         {
1375             if (termid)
1376             {
1377                 struct ord_list *ol;
1378                 for (ol = termid->ol; ol; ol = ol->next)
1379                 {
1380                     zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1381                                           ol->ord, termid->name);
1382                 }
1383             }
1384         }
1385         rset_close(rsfd);
1386         
1387         rset_delete(rset_comb);
1388         nmem_destroy(nmem);
1389         kc->dec(kc);
1390     }
1391     return ZEBRA_OK;
1392 }
1393
1394 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, 
1395                                       const char **basenames, int num_bases,
1396                                       zint recid,
1397                                       zint *sysnos, int *no_sysnos)
1398 {
1399     ZEBRA_RES res = ZEBRA_OK;
1400     int sysnos_offset = 0;
1401     int i;
1402     
1403     if (!zh->reg->isamb || !zh->m_segment_indexing)
1404     {
1405         if (sysnos_offset < *no_sysnos)
1406             *sysnos = recid;
1407         sysnos_offset++;
1408     }
1409     else
1410     {
1411         for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1412         {
1413             const char *database = basenames[i];
1414             if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1415             {
1416                 const char *index_type = "w";
1417                 const char *use_string = "_ALLRECORDS";
1418                 int ord;
1419                 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1420                 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1421                                                    index_type, use_string);
1422                 if (ord != -1)
1423                 {
1424                     char ord_buf[32];
1425                     int ord_len = key_SU_encode(ord, ord_buf);
1426                     char *info;
1427                 
1428                     ord_buf[ord_len] = '\0';
1429                 
1430                     info = dict_lookup(zh->reg->dict, ord_buf);
1431                     if (info)
1432                     {
1433                         if (*info != sizeof(ISAM_P))
1434                         {
1435                             res = ZEBRA_FAIL;
1436                         }
1437                         else
1438                         {
1439                             ISAM_P isam_p;
1440                             ISAMB_PP pt;
1441                             struct it_key key_until, key_found;
1442                             int i = 0;
1443                             int r;
1444                         
1445                             memcpy(&isam_p, info+1, sizeof(ISAM_P));
1446                         
1447                             pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1448                             if (!pt)
1449                                 res = ZEBRA_FAIL;
1450                             else
1451                             {
1452                                 key_until.mem[i++] = recid;
1453                                 key_until.mem[i++] = 0;  /* section_id */
1454                                 if (zh->m_segment_indexing)
1455                                     key_until.mem[i++] = 0; /* segment */
1456                                 key_until.mem[i++] = 0;
1457                                 key_until.len = i;
1458                             
1459                                 r = isamb_pp_forward(pt, &key_found, &key_until);
1460                                 while (r && key_found.mem[0] == recid)
1461                                 {
1462                                     if (sysnos_offset < *no_sysnos)
1463                                         sysnos[sysnos_offset++] = 
1464                                             key_found.mem[key_found.len-1];
1465                                     r = isamb_pp_read(pt, &key_found);
1466                                 }
1467                                 isamb_pp_close(pt);
1468                             }
1469                         }
1470                     }
1471                 }
1472             }
1473         }
1474     }
1475     *no_sysnos = sysnos_offset;
1476     return res;
1477 }
1478
1479 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, 
1480                                       const char *setname,
1481                                       zint recid,
1482                                       zint *sysnos, int *no_sysnos)
1483 {
1484     const char **basenames;
1485     int num_bases;
1486     ZEBRA_RES res;
1487
1488     res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1489     if (res != ZEBRA_OK)
1490         return ZEBRA_FAIL;
1491
1492     return zebra_recid_to_sysno(zh, basenames, num_bases,
1493                                 recid, sysnos, no_sysnos);
1494 }
1495
1496 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1497                      zint approx_limit)
1498 {
1499     zint psysno = 0;
1500     struct it_key key;
1501     RSFD rfd;
1502
1503     yaz_log(YLOG_DEBUG, "count_set");
1504
1505     rset->hits_limit = approx_limit;
1506
1507     *count = 0;
1508     rfd = rset_open(rset, RSETF_READ);
1509     while (rset_read(rfd, &key,0 /* never mind terms */))
1510     {
1511         if (key.mem[0] != psysno)
1512         {
1513             psysno = key.mem[0];
1514             if (rfd->counted_items >= rset->hits_limit)
1515                 break;
1516         }
1517     }
1518     rset_close(rfd);
1519     *count = rset->hits_count;
1520 }
1521                    
1522
1523 /*
1524  * Local variables:
1525  * c-basic-offset: 4
1526  * indent-tabs-mode: nil
1527  * End:
1528  * vim: shiftwidth=4 tabstop=8 expandtab
1529  */
1530