Reformat. Avoid C++ style comments.
[idzebra-moved-to-github.git] / index / zsets.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20
21 #include <stdio.h>
22 #include <assert.h>
23 #ifdef WIN32
24 #include <io.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include "index.h"
30 #include "rank.h"
31 #include <yaz/diagbib1.h>
32 #include <rset.h>
33
34 #define ZSET_SORT_MAX_LEVEL 10
35
36 struct zebra_set_term_entry {
37     int reg_type;
38     char *db;
39     char *index_name;
40     char *term;
41 };
42
43 struct zebra_set {
44     char *name;
45     RSET rset;
46     NMEM nmem;
47     NMEM rset_nmem; /* for creating the rsets in */
48     zint hits;
49     int num_bases;
50     const char **basenames;
51     Z_RPNQuery *rpn;
52     Z_SortKeySpecList *sortSpec;
53     struct zset_sort_info *sort_info;
54     struct zebra_set_term_entry *term_entries;
55     int term_entries_max;
56     struct zebra_set *next;
57     int locked;
58     int estimated_hit_count;
59
60     zint cache_position;  /* last position */
61     RSFD cache_rfd;       /* rfd (NULL if not existing) */
62     zint cache_psysno;    /* sysno for last position */
63     zint approx_limit;    /* limit before we do approx */
64 };
65
66 struct zset_sort_entry {
67     zint sysno;
68     int score;
69 };
70
71 struct zset_sort_info {
72     int max_entries;
73     int num_entries;
74     struct zset_sort_entry *all_entries;
75     struct zset_sort_entry **entries;
76 };
77
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
83
84 static void loglevels(void)
85 {
86     if (log_level_set)
87         return;
88     log_level_sort = yaz_log_module_level("sorting");
89     log_level_searchhits = yaz_log_module_level("searchhits");
90     log_level_searchterms = yaz_log_module_level("searchterms");
91     log_level_resultsets = yaz_log_module_level("resultsets");
92     log_level_set = 1;
93 }
94
95
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97                                  Z_RPNQuery *rpn, ZebraSet sset)
98 {
99     RSET rset = 0;
100     Z_SortKeySpecList *sort_sequence;
101     int sort_status, i;
102     ZEBRA_RES res = ZEBRA_OK;
103
104     sort_sequence = (Z_SortKeySpecList *)
105         nmem_malloc(nmem, sizeof(*sort_sequence));
106     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107     sort_sequence->specs = (Z_SortKeySpec **)
108         nmem_malloc(nmem, sort_sequence->num_specs *
109                     sizeof(*sort_sequence->specs));
110     for (i = 0; i<sort_sequence->num_specs; i++)
111         sort_sequence->specs[i] = 0;
112     
113     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
114
115     res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
116                          nmem, rset_nmem,
117                          sort_sequence,
118                          sset->num_bases, sset->basenames,
119                          &rset);
120     if (res != ZEBRA_OK)
121     {
122         sset->rset = 0;
123         return res;
124     }
125     for (i = 0; sort_sequence->specs[i]; i++)
126         ;
127     sort_sequence->num_specs = i;
128     rset->hits_limit = sset->approx_limit;
129     if (!i)
130     {
131         res = resultSetRank(zh, sset, rset, rset_nmem);
132     }
133     else
134     {
135         res = resultSetSortSingle(zh, nmem, sset, rset,
136                                   sort_sequence, &sort_status);
137     }
138     sset->rset = rset;
139     return res;
140 }
141
142
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144                           int num_bases, char **basenames,
145                           const char *setname,
146                           zint *hits, int *estimated_hit_count)
147 {
148     ZebraSet zebraSet;
149     int i;
150     ZEBRA_RES res;
151
152     *hits = 0;
153     *estimated_hit_count = 0;
154
155     zebraSet = resultSetAdd(zh, setname, 1);
156     if (!zebraSet)
157         return ZEBRA_FAIL;
158     zebraSet->locked = 1;
159     zebraSet->rpn = 0;
160     zebraSet->nmem = m;
161     zebraSet->rset_nmem = nmem_create(); 
162
163     zebraSet->num_bases = num_bases;
164     zebraSet->basenames = 
165         nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166     for (i = 0; i<num_bases; i++)
167         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
168
169     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
170                           rpn, zebraSet);
171     *hits = zebraSet->hits;
172     if (zebraSet->estimated_hit_count)
173         *estimated_hit_count = 1;
174
175     if (zebraSet->rset)
176         zebraSet->rpn = rpn;
177     zebraSet->locked = 0;
178     if (!zebraSet->rset)
179         return ZEBRA_FAIL;
180     return res;
181 }
182
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184                       const char *db, const char *index_name, 
185                       const char *term)
186 {
187     assert(zh); /* compiler shut up */
188     if (!s->nmem)
189         s->nmem = nmem_create();
190     if (!s->term_entries)
191     {
192         int i;
193         s->term_entries_max = 1000;
194         s->term_entries =
195             nmem_malloc(s->nmem, s->term_entries_max * 
196                         sizeof(*s->term_entries));
197         for (i = 0; i < s->term_entries_max; i++)
198             s->term_entries[i].term = 0;
199     }
200     if (s->hits < s->term_entries_max)
201     {
202         s->term_entries[s->hits].reg_type = reg_type;
203         s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
206     }
207     (s->hits)++;
208 }
209
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
211 {
212     ZebraSet s;
213     int i;
214
215     for (s = zh->sets; s; s = s->next)
216         if (!strcmp(s->name, name))
217             break;
218     
219     if (!log_level_set)
220         loglevels();
221     if (s)
222     {
223         yaz_log(log_level_resultsets, "updating result set %s", name);
224         if (!ov || s->locked)
225             return NULL;
226         if (s->rset)
227         {
228             if (s->cache_rfd)
229                 rset_close(s->cache_rfd);
230             rset_delete(s->rset);
231         }
232         if (s->rset_nmem)
233             nmem_destroy(s->rset_nmem);
234         if (s->nmem)
235             nmem_destroy(s->nmem);
236     }
237     else
238     {
239         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
240
241         yaz_log(log_level_resultsets, "adding result set %s", name);
242         s = (ZebraSet) xmalloc(sizeof(*s));
243         s->next = zh->sets;
244         zh->sets = s;
245         s->name = xstrdup(name);
246
247         s->sort_info = (struct zset_sort_info *)
248             xmalloc(sizeof(*s->sort_info));
249         s->sort_info->max_entries = atoi(sort_max_str);
250         if (s->sort_info->max_entries < 2)
251             s->sort_info->max_entries = 2;
252
253         s->sort_info->entries = (struct zset_sort_entry **)
254             xmalloc(sizeof(*s->sort_info->entries) *
255                     s->sort_info->max_entries);
256         s->sort_info->all_entries = (struct zset_sort_entry *)
257             xmalloc(sizeof(*s->sort_info->all_entries) *
258                     s->sort_info->max_entries);
259         for (i = 0; i < s->sort_info->max_entries; i++)
260             s->sort_info->entries[i] = s->sort_info->all_entries + i;
261     }
262     s->locked = 0;
263     s->term_entries = 0;
264     s->hits = 0;
265     s->rset = 0;
266     s->rset_nmem = 0;
267     s->nmem = 0;
268     s->rpn = 0;
269     s->sortSpec = 0;
270     s->cache_position = 0;
271     s->cache_rfd = 0;
272     s->approx_limit = zh->approx_limit;
273     s->estimated_hit_count = 0;
274     return s;
275 }
276
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
278 {
279     ZebraSet s;
280
281     for (s = zh->sets; s; s = s->next)
282         if (!strcmp(s->name, name))
283         {
284             if (!s->term_entries && !s->rset && s->rpn)
285             {
286                 NMEM nmem = nmem_create();
287                 yaz_log(log_level_resultsets, "research %s", name);
288                 if (!s->rset_nmem)
289                     s->rset_nmem = nmem_create();
290                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291                 if (s->rset && s->sortSpec)
292                 {
293                     int sort_status;
294                     yaz_log(log_level_resultsets, "resort %s", name);
295                     resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
296                                         &sort_status);
297                 }
298                 nmem_destroy(nmem);
299             }
300             return s;
301         }
302     return NULL;
303 }
304
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306                                 const char ***basenames, int *num_bases)
307 {
308     ZebraSet sset = resultSetGet(zh, setname);
309     if (!sset)
310         return ZEBRA_FAIL;
311     *basenames = sset->basenames;
312     *num_bases = sset->num_bases;
313     return ZEBRA_OK;
314
315 }
316
317 void resultSetInvalidate(ZebraHandle zh)
318 {
319     ZebraSet s = zh->sets;
320     
321     yaz_log(log_level_resultsets, "invalidating result sets");
322     for (; s; s = s->next)
323     {
324         if (s->rset)
325         {
326             if (s->cache_rfd)
327                 rset_close(s->cache_rfd);
328             rset_delete(s->rset);
329         }
330         s->rset = 0;
331         s->cache_rfd = 0;
332         s->cache_position = 0;
333         if (s->rset_nmem)
334             nmem_destroy(s->rset_nmem);
335         s->rset_nmem=0;
336     }
337 }
338
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
340 {
341     ZebraSet * ss = &zh->sets;
342     int i;
343     
344     if (statuses)
345         for (i = 0; i<num; i++)
346             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
347     while (*ss)
348     {
349         int i = -1;
350         ZebraSet s = *ss;
351         if (num >= 0)
352         {
353             for (i = 0; i<num; i++)
354                 if (!strcmp(s->name, names[i]))
355                 {
356                     if (statuses)
357                         statuses[i] = Z_DeleteStatus_success;
358                     i = -1;
359                     break;
360                 }
361         }
362         if (i < 0)
363         {
364             *ss = s->next;
365             
366             xfree(s->sort_info->all_entries);
367             xfree(s->sort_info->entries);
368             xfree(s->sort_info);
369             
370             if (s->nmem)
371                 nmem_destroy(s->nmem);
372             if (s->rset)
373             {
374                 if (s->cache_rfd)
375                     rset_close(s->cache_rfd);
376                 rset_delete(s->rset);
377             }
378             if (s->rset_nmem)
379                 nmem_destroy(s->rset_nmem);
380             xfree(s->name);
381             xfree(s);
382         }
383         else
384             ss = &s->next;
385     }
386 }
387
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
389                                                  const char *name, 
390                                                  zint start, int num)
391 {
392     zint pos_small[10];
393     zint *pos = pos_small;
394     ZebraMetaRecord *mr;
395     int i;
396
397     if (num > 10000 || num <= 0)
398         return 0;
399
400     if (num > 10)
401         pos = xmalloc(sizeof(*pos) * num);
402     
403     for (i = 0; i<num; i++)
404         pos[i] = start+i;
405
406     mr = zebra_meta_records_create(zh, name, num, pos);
407     
408     if (num > 10)
409         xfree(pos);
410     return mr;
411 }
412
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
414                                            int num, zint *positions)
415 {
416     ZebraSet sset;
417     ZebraMetaRecord *sr = 0;
418     RSET rset;
419     int i;
420     struct zset_sort_info *sort_info;
421     size_t sysno_mem_index = 0;
422
423     if (zh->m_staticrank)
424         sysno_mem_index = 1;
425
426     if (!log_level_set)
427         loglevels();
428     if (!(sset = resultSetGet(zh, name)))
429         return NULL;
430     if (!(rset = sset->rset))
431     {
432         if (!sset->term_entries)
433             return 0;
434         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435         for (i = 0; i<num; i++)
436         {
437             sr[i].sysno = 0;
438             sr[i].score = -1;
439             sr[i].term = 0;
440             sr[i].db = 0;
441
442             if (positions[i] <= sset->term_entries_max)
443             {
444                 sr[i].term = sset->term_entries[positions[i]-1].term;
445                 sr[i].db = sset->term_entries[positions[i]-1].db;
446             }
447         }
448     }
449     else
450     {
451         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452         for (i = 0; i<num; i++)
453         {
454             sr[i].sysno = 0;
455             sr[i].score = -1;
456             sr[i].term = 0;
457             sr[i].db = 0;
458         }
459         sort_info = sset->sort_info;
460         if (sort_info)
461         {
462             zint position;
463             
464             for (i = 0; i<num; i++)
465             {
466                 position = positions[i];
467                 if (position > 0 && position <= sort_info->num_entries)
468                 {
469                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470                             " (sorted)", position);
471                     sr[i].sysno = sort_info->entries[position-1]->sysno;
472                     sr[i].score = sort_info->entries[position-1]->score;
473                 }
474             }
475         }
476         /* did we really get all entries using sort ? */
477         for (i = 0; i<num; i++)
478         {
479             if (!sr[i].sysno)
480                 break;
481         }
482         if (i < num) /* nope, get the rest, unsorted - sorry */
483         {
484             zint position = 0;
485             int num_i = 0;
486             zint psysno = 0;
487             RSFD rfd;
488             struct it_key key;
489             
490             if (sort_info)
491                 position = sort_info->num_entries;
492             while (num_i < num && positions[num_i] <= position)
493                 num_i++;
494             
495             if (sset->cache_rfd &&
496                 num_i < num && positions[num_i] > sset->cache_position)
497             {
498                 position = sset->cache_position;
499                 rfd = sset->cache_rfd;
500                 psysno = sset->cache_psysno;
501             }
502             else
503             {
504                 if (sset->cache_rfd)
505                     rset_close(sset->cache_rfd);
506                 rfd = rset_open(rset, RSETF_READ);
507             }
508             while (num_i < num && rset_read(rfd, &key, 0))
509             {
510                 zint this_sys = key.mem[sysno_mem_index];
511                 if (this_sys != psysno)
512                 {
513                     psysno = this_sys;
514                     if (sort_info)
515                     {
516                         /* determine we alreay have this in our set */
517                         for (i = sort_info->num_entries; --i >= 0; )
518                             if (psysno == sort_info->entries[i]->sysno)
519                                 break;
520                         if (i >= 0)
521                             continue;
522                     }
523                     position++;
524                     assert(num_i < num);
525                     if (position == positions[num_i])
526                     {
527                         sr[num_i].sysno = psysno;
528                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529                         sr[num_i].score = -1;
530                         num_i++;
531                     }
532                 }
533             }
534             sset->cache_position = position;
535             sset->cache_psysno = psysno;
536             sset->cache_rfd = rfd;
537         }
538     }
539     return sr;
540 }
541
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
543                                 int num)
544 {
545     assert(zh); /* compiler shut up about unused arg */
546     xfree(records);
547 }
548
549 struct sortKeyInfo {
550     int relation;
551     int *ord; /* array of ord for each database searched */
552     int *numerical; /* array of ord for each database searched */
553     const char *index_type;
554 };
555
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557                          struct sortKeyInfo *criteria, int num_criteria,
558                          zint sysno,
559                          char *cmp_buf[], char *tmp_cmp_buf[], int *cached_success_db)
560 {
561     struct zset_sort_entry *new_entry = NULL;
562     struct zset_sort_info *sort_info = sset->sort_info;
563     int i, j;
564     int scan_db,scan_count;
565     int numbases = zh->num_basenames;
566
567     zebra_sort_sysno(zh->reg->sort_index, sysno);
568     for (i = 0; i<num_criteria; i++)
569     {
570         char *this_entry_buf = tmp_cmp_buf[i];
571         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
572         
573         /* if the first database doesn't have a sort index, 
574            we assume none of them will */
575         if (criteria[i].ord[0] != -1)
576         {
577             /* now make a best guess for the database in which we think
578                the record is located if its not in our best guess, try the
579                other databases one by one, till we had them all */
580             scan_db = *cached_success_db;
581             scan_count = 0;
582             
583             while (1)
584             {
585                 scan_count++;
586                 if (scan_count>numbases)
587                 {
588                     /* well...we scanned all databases and still nothing...give up */
589                     yaz_log(log_level_sort, "zebra_sort_read failed (record not found in indices)");
590                     break;
591                 }
592                 
593                 /* the criteria[i].ord is the file id of the sort index */
594                 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d", criteria[i].ord[scan_db]);
595                 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[scan_db]);
596                 if (zebra_sort_read(zh->reg->sort_index, this_entry_buf))
597                 {
598                     /* allright, found it */
599                     /* cache this db so we start trying from this db 
600                        for next record */
601                     *cached_success_db=scan_db;
602                     break;
603                 }
604                 else
605                 {
606                     yaz_log(log_level_sort, "record not found in database, trying next one");
607                     scan_db++;
608                     if (scan_db>=numbases)
609                         scan_db=0;
610                 }
611             }
612             
613         }
614         else
615         {
616             yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
617         }
618     }
619     i = sort_info->num_entries;
620     while (--i >= 0)
621     {
622         int rel = 0;
623         for (j = 0; j<num_criteria; j++)
624         {
625             char *this_entry_buf = tmp_cmp_buf[j];
626             char *other_entry_buf = 
627                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
628             if (criteria[j].numerical[*cached_success_db])
629             {
630                 char this_entry_org[1024];
631                 char other_entry_org[1024];
632                 double diff;
633                 /* when searching multiple databases, we use the index
634                    type of the first one. So if they differ between
635                    databases, we have a problem here we could store the
636                    index_type for each database, but if we didn't find the
637                    record in any sort index, then we still don't know to
638                    which database it belongs. */
639                 const char *index_type = criteria[j].index_type;
640                 zebra_term_untrans(zh, index_type, this_entry_org,
641                                    this_entry_buf);
642                 zebra_term_untrans(zh, index_type, other_entry_org,
643                                    other_entry_buf);
644                 diff = atof(this_entry_org) - atof(other_entry_org);
645                 
646                 if (diff > 0.0)
647                     rel = 1;
648                 else if (diff < 0.0)
649                     rel = -1;
650                 else
651                     rel = 0;
652             }
653             else
654             {
655                 rel = memcmp(this_entry_buf, other_entry_buf,
656                              SORT_IDX_ENTRYSIZE);
657             }
658             /* when the compare is equal, continue to next criteria, 
659                else break out */
660             if (rel)
661                 break;
662         }       
663         if (!rel)
664             break;
665         if (criteria[j].relation == 'A')
666         {
667             if (rel > 0)
668                 break;
669         }
670         else if (criteria[j].relation == 'D')
671         {
672             if (rel < 0)
673                 break;
674         }
675     }
676     ++i;
677     yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
678     j = sort_info->max_entries;
679     if (i == j){
680         yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
681         return;
682     }
683
684     if (sort_info->num_entries == j)
685         --j;
686     else
687         j = (sort_info->num_entries)++;
688     new_entry = sort_info->entries[j];
689     /* move up all higher entries (to make room) */
690     while (j != i)
691     {
692         int k;
693         for (k = 0; k<num_criteria; k++)
694         {
695             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
696             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
697             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
698         }
699         sort_info->entries[j] = sort_info->entries[j-1];
700         --j;
701     }
702     /* and insert the new entry at the correct place */
703     sort_info->entries[i] = new_entry;
704     assert(new_entry);
705     /* and add this to the compare buffer */
706     for (i = 0; i<num_criteria; i++)
707     {
708         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
709         char *this_entry_buf = tmp_cmp_buf[i];
710         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
711     }
712     new_entry->sysno = sysno;
713     new_entry->score = -1;
714 }
715
716 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
717                          zint sysno, int score, int relation)
718 {
719     struct zset_sort_entry *new_entry = NULL;
720     int i, j;
721     assert(zh); /* compiler shut up about unused arg */
722
723     i = sort_info->num_entries;
724     while (--i >= 0)
725     {
726         int rel = 0;
727
728         rel = score - sort_info->entries[i]->score;
729
730         if (relation == 'D')
731         {
732             if (rel >= 0)
733                 break;
734         }
735         else if (relation == 'A')
736         {
737             if (rel <= 0)
738                 break;
739         }
740     }
741     ++i;
742     j = sort_info->max_entries;
743     if (i == j)
744         return;
745
746     if (sort_info->num_entries == j)
747         --j;
748     else
749         j = (sort_info->num_entries)++;
750     
751     new_entry = sort_info->entries[j];
752     while (j != i)
753     {
754         sort_info->entries[j] = sort_info->entries[j-1];
755         --j;
756     }
757     sort_info->entries[i] = new_entry;
758     assert(new_entry);
759     new_entry->sysno = sysno;
760     new_entry->score = score;
761 }
762
763 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
764 {
765     Z_RPNQuery *dst = 0;
766     ODR encode = odr_createmem(ODR_ENCODE);
767     ODR decode = odr_createmem(ODR_DECODE);
768
769     if (z_RPNQuery(encode, &src, 0, 0))
770     {
771         int len;
772         char *buf = odr_getbuf(encode, &len, 0);
773
774         if (buf)
775         {
776             odr_setbuf(decode, buf, len, 0);
777             z_RPNQuery(decode, &dst, 0, 0);
778         }
779     }
780     nmem_transfer(nmem, decode->mem);
781     odr_destroy(encode);
782     odr_destroy(decode);
783     return dst;
784 }
785
786 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
787 {
788     Z_SortKeySpecList *dst = 0;
789     ODR encode = odr_createmem(ODR_ENCODE);
790     ODR decode = odr_createmem(ODR_DECODE);
791
792     if (z_SortKeySpecList(encode, &src, 0, 0))
793     {
794         int len;
795         char *buf = odr_getbuf(encode, &len, 0);
796
797         if (buf)
798         {
799             odr_setbuf(decode, buf, len, 0);
800             z_SortKeySpecList(decode, &dst, 0, 0);
801         }
802     }
803     nmem_transfer(nmem, decode->mem);
804     odr_destroy(encode);
805     odr_destroy(decode);
806     return dst;
807 }
808
809 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
810                         ZebraSet rset)
811 {
812     ZebraSet nset;
813     int i;
814
815     nset = resultSetAdd(zh, setname, 1);
816     if (!nset)
817         return 0;
818
819     nset->nmem = nmem_create();
820
821     nset->num_bases = rset->num_bases;
822     nset->basenames = 
823         nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
824     for (i = 0; i<rset->num_bases; i++)
825         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
826
827     if (rset->rset)
828         nset->rset = rset_dup(rset->rset);
829     if (rset->rpn)
830         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
831     return nset;
832 }
833
834 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
835                         int num_input_setnames, const char **input_setnames,
836                         const char *output_setname,
837                         Z_SortKeySpecList *sort_sequence, int *sort_status)
838 {
839     ZebraSet sset;
840     RSET rset;
841
842     if (num_input_setnames == 0)
843     {
844         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
845         return ZEBRA_FAIL;
846     }
847     if (num_input_setnames > 1)
848     {
849         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
850         return ZEBRA_FAIL;
851     }
852     if (!log_level_set)
853         loglevels();
854     yaz_log(log_level_sort, "result set sort input=%s output=%s",
855             *input_setnames, output_setname);
856     sset = resultSetGet(zh, input_setnames[0]);
857     if (!sset)
858     {
859         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
860                        input_setnames[0]);
861         return ZEBRA_FAIL;
862     }
863     if (!(rset = sset->rset))
864     {
865         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
866                        input_setnames[0]);
867         return ZEBRA_FAIL;
868     }
869     if (strcmp(output_setname, input_setnames[0]))
870         sset = resultSetClone(zh, output_setname, sset);
871     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
872     return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
873                                sort_status);
874 }
875
876 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
877                               ZebraSet sset, RSET rset,
878                               Z_SortKeySpecList *sort_sequence,
879                               int *sort_status)
880 {
881     int i;
882     int ib;
883     int cached_success_db = 0;
884     int n = 0;
885     zint kno = 0;
886     zint psysno = 0;
887     struct it_key key;
888     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
889     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
890     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
891     int num_criteria;
892     RSFD rfd;
893     TERMID termid;
894     TERMID *terms;
895     int numTerms = 0;
896     size_t sysno_mem_index = 0;
897     
898     int numbases = zh->num_basenames;
899     yaz_log(log_level_sort, "searching %d databases",numbases);
900
901     if (zh->m_staticrank)
902         sysno_mem_index = 1;
903
904     assert(nmem); /* compiler shut up about unused param */
905     sset->sort_info->num_entries = 0;
906
907     rset_getterms(rset, 0, 0, &n);
908     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
909     rset_getterms(rset, terms, n, &numTerms);
910
911     sset->hits = 0;
912     num_criteria = sort_sequence->num_specs;
913     if (num_criteria > ZSET_SORT_MAX_LEVEL)
914         num_criteria = ZSET_SORT_MAX_LEVEL;
915     /* set up the search criteria */
916     for (i = 0; i < num_criteria; i++)
917     {
918         Z_SortKeySpec *sks = sort_sequence->specs[i];
919         Z_SortKey *sk;
920         ZEBRA_RES res;
921         
922         sort_criteria[i].ord = (int *)
923             nmem_malloc(nmem, sizeof(int)*numbases);
924         sort_criteria[i].numerical = (int *)
925             nmem_malloc(nmem, sizeof(int)*numbases);
926         
927         /* initialize ord and numerical for each database */
928         for (ib = 0; ib < numbases; ib++)
929         {
930             sort_criteria[i].ord[ib] = -1;
931             sort_criteria[i].numerical[ib] = 0;
932         }
933
934         if (sks->which == Z_SortKeySpec_missingValueData)
935         {
936             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
937             return ZEBRA_FAIL;
938         }
939         if (*sks->sortRelation == Z_SortKeySpec_ascending)
940             sort_criteria[i].relation = 'A';
941         else if (*sks->sortRelation == Z_SortKeySpec_descending)
942             sort_criteria[i].relation = 'D';
943         else
944         {
945             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
946             return ZEBRA_FAIL;
947         }
948         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
949         {
950             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
951             return ZEBRA_FAIL;
952         }
953         else if (sks->sortElement->which != Z_SortElement_generic)
954         {
955             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
956             return ZEBRA_FAIL;
957         }       
958         sk = sks->sortElement->u.generic;
959         switch (sk->which)
960         {
961         case Z_SortKey_sortField:
962             yaz_log(log_level_sort, "key %d is of type sortField", i+1);
963             for (ib = 0; ib < numbases; ib++)
964             {
965                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
966                 sort_criteria[i].numerical[ib] = 0;
967                 sort_criteria[i].ord[ib] = 
968                     zebraExplain_lookup_attr_str(zh->reg->zei,
969                                                  zinfo_index_category_sort,
970                                                  0, sk->u.sortField);
971                 if (sks->which != Z_SortKeySpec_null
972                     && sort_criteria[i].ord[ib] == -1)
973                 {
974                     zebra_setError(zh,
975                                    YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
976                     return ZEBRA_FAIL;
977                 }
978             }
979             break;
980         case Z_SortKey_elementSpec:
981             yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
982             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
983             return ZEBRA_FAIL;
984         case Z_SortKey_sortAttributes:
985             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
986             /* for every database we searched, get the sort index file
987                id (ord) and its numerical indication and store them in
988                the sort_criteria */
989             for (ib = 0; ib < numbases; ib++)
990             {
991                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
992                 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
993                                          &sort_criteria[i].ord[ib],
994                                          &sort_criteria[i].numerical[ib]);
995             }
996             
997             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
998                 return ZEBRA_FAIL;
999             break;
1000         }
1001         /* right now we look up the index type based on the first database
1002            if the index_type's can differ between the indexes of different
1003            databases (which i guess they can?) then we have to store the
1004            index types for each database, just like the ord and numerical */
1005         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
1006                                     &sort_criteria[i].index_type,
1007                                     0, 0))
1008         {
1009             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1010             return ZEBRA_FAIL;
1011         }
1012     }
1013     /* allocate space for each cmpare buf + one extra for tmp comparison */
1014     /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1015        all other result entries to compare against. This is slowly filled when records are processed.
1016        tmp_cmp_buf is an array with a value of the current record for each criteria
1017     */
1018     for (i = 0; i<num_criteria; i++)
1019     {
1020         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1021                              * SORT_IDX_ENTRYSIZE);
1022         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1023     }
1024     rfd = rset_open(rset, RSETF_READ);
1025     while (rset_read(rfd, &key, &termid))
1026     {
1027         zint this_sys = key.mem[sysno_mem_index];
1028         if (log_level_searchhits)
1029             key_logdump_txt(log_level_searchhits, &key, termid->name);
1030         kno++;
1031         if (this_sys != psysno)
1032         {
1033             if ((sset->hits & 255) == 0 && zh->break_handler_func)
1034             {
1035                 if (zh->break_handler_func(zh->break_handler_data))
1036                 {
1037                     rset_set_hits_limit(rset, 0);
1038                     break;
1039                 }
1040             }
1041             (sset->hits)++;
1042             psysno = this_sys;
1043             resultSetInsertSort(zh, sset,
1044                                 sort_criteria, num_criteria, psysno, cmp_buf,
1045                                 tmp_cmp_buf, &cached_success_db);
1046         }
1047     }
1048     rset_close(rfd);
1049
1050     /* free the compare buffers */
1051     for (i = 0; i<num_criteria; i++)
1052     {
1053         xfree(cmp_buf[i]);
1054         xfree(tmp_cmp_buf[i]);
1055     }
1056
1057     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1058             kno, sset->hits);   
1059     for (i = 0; i < numTerms; i++)
1060         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1061                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1062     *sort_status = Z_SortResponse_success;
1063     return ZEBRA_OK;
1064 }
1065
1066 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1067 {
1068     ZebraSet s;
1069
1070     if ((s = resultSetGet(zh, resultSetId)))
1071         return s->rset;
1072     return NULL;
1073 }
1074
1075 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1076                         RSET rset, NMEM nmem)
1077 {
1078     struct it_key key;
1079     TERMID termid;
1080     TERMID *terms;
1081     zint kno = 0;
1082     int numTerms = 0;
1083     int n = 0;
1084     int i;
1085     ZebraRankClass rank_class;
1086     struct zset_sort_info *sort_info;
1087     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1088     size_t sysno_mem_index = 0;
1089
1090     if (zh->m_staticrank)
1091         sysno_mem_index = 1;
1092
1093     if (!log_level_set)
1094         loglevels();
1095     sort_info = zebraSet->sort_info;
1096     sort_info->num_entries = 0;
1097     zebraSet->hits = 0;
1098     zebraSet->estimated_hit_count = 0;
1099     rset_getterms(rset, 0, 0, &n);
1100     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1101     rset_getterms(rset, terms, n, &numTerms);
1102
1103     rank_class = zebraRankLookup(zh, rank_handler_name);
1104     if (!rank_class)
1105     {
1106         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1107         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1108         return ZEBRA_FAIL;
1109     }
1110     else
1111     {
1112         RSFD rfd = rset_open(rset, RSETF_READ);
1113         struct rank_control *rc = rank_class->control;
1114         int score;
1115         zint count = 0;
1116         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1117                                      nmem, terms, numTerms);
1118         zint psysno = 0;  /* previous doc id / sys no */
1119         zint pstaticrank = 0; /* previous static rank */
1120         int stop_flag = 0;
1121         while (rset_read(rfd, &key, &termid))
1122         {
1123             zint this_sys = key.mem[sysno_mem_index];
1124
1125             zint seqno = key.mem[key.len-1];
1126             kno++;
1127             if (log_level_searchhits)
1128                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1129             if (this_sys != psysno) 
1130             {   /* new record .. */
1131                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1132                 {
1133                     if (zh->break_handler_func(zh->break_handler_data))
1134                     {
1135                         yaz_log(YLOG_LOG, "Aborted search");
1136                         stop_flag = 1;
1137                     }
1138                 }
1139                 if (rfd->counted_items > rset->hits_limit)
1140                     stop_flag = 1;
1141                 if (psysno)
1142                 {   /* only if we did have a previous record */
1143                     score = (*rc->calc)(handle, psysno, pstaticrank,
1144                                         &stop_flag);
1145                     /* insert the hit. A=Ascending */
1146                     resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1147                     count++;
1148                 }
1149                 if (stop_flag)
1150                 {
1151                     zebraSet->estimated_hit_count = 1;
1152                     rset_set_hits_limit(rset, 0);
1153                     break;
1154                 }
1155                 psysno = this_sys;
1156                 if (zh->m_staticrank)
1157                     pstaticrank = key.mem[0];
1158             }
1159             (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1160         }
1161         /* no more items */
1162         if (psysno)
1163         {   /* we had - at least - one record */
1164             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1165             /* insert the hit. A=Ascending */
1166             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1167             count++;
1168         }
1169         (*rc->end)(zh->reg, handle);
1170         rset_close(rfd);
1171     }
1172     zebraSet->hits = rset->hits_count;
1173
1174     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1175             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1176     for (i = 0; i < numTerms; i++)
1177     {
1178         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1179                 ZINT_FORMAT,
1180                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1181     }
1182     return ZEBRA_OK;
1183 }
1184
1185 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1186 {
1187     ZebraRankClass p = zh->reg->rank_classes;
1188     while (p && strcmp(p->control->name, name))
1189         p = p->next;
1190     if (p && !p->init_flag)
1191     {
1192         if (p->control->create)
1193             p->class_handle = (*p->control->create)(zh);
1194         p->init_flag = 1;
1195     }
1196     return p;
1197 }
1198
1199 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1200 {
1201     ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1202     p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1203     memcpy(p->control, ctrl, sizeof(*p->control));
1204     p->control->name = xstrdup(ctrl->name);
1205     p->init_flag = 0;
1206     p->next = reg->rank_classes;
1207     reg->rank_classes = p;
1208 }
1209
1210 void zebraRankDestroy(struct zebra_register *reg)
1211 {
1212     ZebraRankClass p = reg->rank_classes;
1213     while (p)
1214     {
1215         ZebraRankClass p_next = p->next;
1216         if (p->init_flag && p->control->destroy)
1217             (*p->control->destroy)(reg, p->class_handle);
1218         xfree(p->control->name);
1219         xfree(p->control);
1220         xfree(p);
1221         p = p_next;
1222     }
1223     reg->rank_classes = NULL;
1224 }
1225
1226 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1227                                  zint *hits_array, int *approx_array)
1228 {
1229     int no = 0;
1230     int i;
1231     for (i = 0; i<rset->no_children; i++)
1232         no += trav_rset_for_termids(rset->children[i],
1233                                     (termid_array ? termid_array + no : 0),
1234                                     (hits_array ? hits_array + no : 0),
1235                                     (approx_array ? approx_array + no : 0));
1236     if (rset->term)
1237     {
1238         if (termid_array)
1239             termid_array[no] = rset->term;
1240         if (hits_array)
1241             hits_array[no] = rset->hits_count;
1242         if (approx_array)
1243             approx_array[no] = rset->hits_approx;
1244 #if 0
1245         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1246                 " count=" ZINT_FORMAT,
1247                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1248 #endif
1249         no++;
1250     }
1251     return no;
1252 }
1253
1254 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1255                                    int *num_terms)
1256 {
1257     ZebraSet sset = resultSetGet(zh, setname);
1258     *num_terms = 0;
1259     if (sset)
1260     {
1261         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1262         return ZEBRA_OK;
1263     }
1264     return ZEBRA_FAIL;
1265 }
1266
1267 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1268                                      int no, zint *count, int *approx,
1269                                      char *termbuf, size_t *termlen,
1270                                      const char **term_ref_id)
1271 {
1272     ZebraSet sset = resultSetGet(zh, setname);
1273     if (sset)
1274     {
1275         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1276         if (no >= 0 && no < num_terms)
1277         {
1278             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1279             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1280             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1281             
1282             trav_rset_for_termids(sset->rset, term_array,
1283                                   hits_array, approx_array);
1284
1285             if (count)
1286                 *count = hits_array[no];
1287             if (approx)
1288                 *approx = approx_array[no];
1289             if (termbuf)
1290             {
1291                 char *inbuf = term_array[no]->name;
1292                 size_t inleft = strlen(inbuf);
1293                 size_t outleft = *termlen - 1;
1294
1295                 if (zh->iconv_from_utf8 != 0)
1296                 {
1297                     char *outbuf = termbuf;
1298                     size_t ret;
1299                     
1300                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1301                                     &outbuf, &outleft);
1302                     if (ret == (size_t)(-1))
1303                         *termlen = 0;
1304                     else
1305                     {
1306                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1307                                   &outbuf, &outleft);
1308                         *termlen = outbuf - termbuf;
1309                     }
1310                 }
1311                 else
1312                 {
1313                     if (inleft > outleft)
1314                         inleft = outleft;
1315                     *termlen = inleft;
1316                     memcpy(termbuf, inbuf, *termlen);
1317                 }
1318                 termbuf[*termlen] = '\0';
1319             }
1320             if (term_ref_id)
1321                 *term_ref_id = term_array[no]->ref_id;
1322
1323             xfree(term_array);
1324             xfree(hits_array);
1325             xfree(approx_array);
1326             return ZEBRA_OK;
1327         }
1328     }
1329     return ZEBRA_FAIL;
1330 }
1331
1332 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1333                                     zint sysno, zebra_snippets *snippets)
1334 {
1335     ZebraSet sset = resultSetGet(zh, setname);
1336     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1337             setname, sysno);
1338     if (!sset)
1339         return ZEBRA_FAIL;
1340     else
1341     {
1342         struct rset_key_control *kc = zebra_key_control_create(zh);
1343         NMEM nmem = nmem_create();
1344         struct it_key key;
1345         RSET rsets[2], rset_comb;
1346         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1347                                           res_get(zh->res, "setTmpDir"),0 );
1348         
1349         TERMID termid;
1350         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1351         
1352         key.mem[0] = sysno;
1353         key.mem[1] = 0;
1354         key.mem[2] = 0;
1355         key.mem[3] = 0;
1356         key.len = 2;
1357         rset_write(rsfd, &key);
1358         rset_close(rsfd);
1359
1360         rsets[0] = rset_temp;
1361         rsets[1] = rset_dup(sset->rset);
1362         
1363         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1364
1365         rsfd = rset_open(rset_comb, RSETF_READ);
1366
1367         while (rset_read(rsfd, &key, &termid))
1368         {
1369             if (termid)
1370             {
1371                 struct ord_list *ol;
1372                 for (ol = termid->ol; ol; ol = ol->next)
1373                 {
1374                     zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1375                                           ol->ord, termid->name);
1376                 }
1377             }
1378         }
1379         rset_close(rsfd);
1380         
1381         rset_delete(rset_comb);
1382         nmem_destroy(nmem);
1383         kc->dec(kc);
1384     }
1385     return ZEBRA_OK;
1386 }
1387
1388 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, 
1389                                       const char **basenames, int num_bases,
1390                                       zint recid,
1391                                       zint *sysnos, int *no_sysnos)
1392 {
1393     ZEBRA_RES res = ZEBRA_OK;
1394     int sysnos_offset = 0;
1395     int i;
1396     
1397     if (!zh->reg->isamb || !zh->m_segment_indexing)
1398     {
1399         if (sysnos_offset < *no_sysnos)
1400             *sysnos = recid;
1401         sysnos_offset++;
1402     }
1403     else
1404     {
1405         for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1406         {
1407             const char *database = basenames[i];
1408             if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1409             {
1410                 const char *index_type = "w";
1411                 const char *use_string = "_ALLRECORDS";
1412                 int ord;
1413                 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1414                 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1415                                                    index_type, use_string);
1416                 if (ord != -1)
1417                 {
1418                     char ord_buf[32];
1419                     int ord_len = key_SU_encode(ord, ord_buf);
1420                     char *info;
1421                 
1422                     ord_buf[ord_len] = '\0';
1423                 
1424                     info = dict_lookup(zh->reg->dict, ord_buf);
1425                     if (info)
1426                     {
1427                         if (*info != sizeof(ISAM_P))
1428                         {
1429                             res = ZEBRA_FAIL;
1430                         }
1431                         else
1432                         {
1433                             ISAM_P isam_p;
1434                             ISAMB_PP pt;
1435                             struct it_key key_until, key_found;
1436                             int i = 0;
1437                             int r;
1438                         
1439                             memcpy(&isam_p, info+1, sizeof(ISAM_P));
1440                         
1441                             pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1442                             if (!pt)
1443                                 res = ZEBRA_FAIL;
1444                             else
1445                             {
1446                                 key_until.mem[i++] = recid;
1447                                 key_until.mem[i++] = 0;  /* section_id */
1448                                 if (zh->m_segment_indexing)
1449                                     key_until.mem[i++] = 0; /* segment */
1450                                 key_until.mem[i++] = 0;
1451                                 key_until.len = i;
1452                             
1453                                 r = isamb_pp_forward(pt, &key_found, &key_until);
1454                                 while (r && key_found.mem[0] == recid)
1455                                 {
1456                                     if (sysnos_offset < *no_sysnos)
1457                                         sysnos[sysnos_offset++] = 
1458                                             key_found.mem[key_found.len-1];
1459                                     r = isamb_pp_read(pt, &key_found);
1460                                 }
1461                                 isamb_pp_close(pt);
1462                             }
1463                         }
1464                     }
1465                 }
1466             }
1467         }
1468     }
1469     *no_sysnos = sysnos_offset;
1470     return res;
1471 }
1472
1473 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, 
1474                                       const char *setname,
1475                                       zint recid,
1476                                       zint *sysnos, int *no_sysnos)
1477 {
1478     const char **basenames;
1479     int num_bases;
1480     ZEBRA_RES res;
1481
1482     res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1483     if (res != ZEBRA_OK)
1484         return ZEBRA_FAIL;
1485
1486     return zebra_recid_to_sysno(zh, basenames, num_bases,
1487                                 recid, sysnos, no_sysnos);
1488 }
1489
1490 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1491                      zint approx_limit)
1492 {
1493     zint psysno = 0;
1494     struct it_key key;
1495     RSFD rfd;
1496
1497     yaz_log(YLOG_DEBUG, "count_set");
1498
1499     rset->hits_limit = approx_limit;
1500
1501     *count = 0;
1502     rfd = rset_open(rset, RSETF_READ);
1503     while (rset_read(rfd, &key,0 /* never mind terms */))
1504     {
1505         if (key.mem[0] != psysno)
1506         {
1507             psysno = key.mem[0];
1508             if (rfd->counted_items >= rset->hits_limit)
1509                 break;
1510         }
1511     }
1512     rset_close(rfd);
1513     *count = rset->hits_count;
1514 }
1515                    
1516
1517 /*
1518  * Local variables:
1519  * c-basic-offset: 4
1520  * indent-tabs-mode: nil
1521  * End:
1522  * vim: shiftwidth=4 tabstop=8 expandtab
1523  */
1524