Proper sort entry copying
[idzebra-moved-to-github.git] / index / zsets.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1995-2008 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20
21 #include <stdio.h>
22 #include <assert.h>
23 #ifdef WIN32
24 #include <io.h>
25 #else
26 #include <unistd.h>
27 #endif
28
29 #include "index.h"
30 #include "rank.h"
31 #include <yaz/diagbib1.h>
32 #include <rset.h>
33
34 #define ZSET_SORT_MAX_LEVEL 10
35
36 struct zebra_set_term_entry {
37     int reg_type;
38     char *db;
39     char *index_name;
40     char *term;
41 };
42
43 struct zebra_set {
44     char *name;
45     RSET rset;
46     NMEM nmem;
47     NMEM rset_nmem; /* for creating the rsets in */
48     zint hits;
49     int num_bases;
50     const char **basenames;
51     Z_RPNQuery *rpn;
52     Z_SortKeySpecList *sortSpec;
53     struct zset_sort_info *sort_info;
54     struct zebra_set_term_entry *term_entries;
55     int term_entries_max;
56     struct zebra_set *next;
57     int locked;
58     int estimated_hit_count;
59
60     zint cache_position;  /* last position */
61     RSFD cache_rfd;       /* rfd (NULL if not existing) */
62     zint cache_psysno;    /* sysno for last position */
63     zint approx_limit;    /* limit before we do approx */
64 };
65
66 struct zset_sort_entry {
67     zint sysno;
68     int score;
69 };
70
71 struct zset_sort_info {
72     int max_entries;
73     int num_entries;
74     struct zset_sort_entry *all_entries;
75     struct zset_sort_entry **entries;
76 };
77
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
83
84 static void loglevels(void)
85 {
86     if (log_level_set)
87         return;
88     log_level_sort = yaz_log_module_level("sorting");
89     log_level_searchhits = yaz_log_module_level("searchhits");
90     log_level_searchterms = yaz_log_module_level("searchterms");
91     log_level_resultsets = yaz_log_module_level("resultsets");
92     log_level_set = 1;
93 }
94
95
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97                                  Z_RPNQuery *rpn, ZebraSet sset)
98 {
99     RSET rset = 0;
100     Z_SortKeySpecList *sort_sequence;
101     int sort_status, i;
102     ZEBRA_RES res = ZEBRA_OK;
103
104     sort_sequence = (Z_SortKeySpecList *)
105         nmem_malloc(nmem, sizeof(*sort_sequence));
106     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107     sort_sequence->specs = (Z_SortKeySpec **)
108         nmem_malloc(nmem, sort_sequence->num_specs *
109                     sizeof(*sort_sequence->specs));
110     for (i = 0; i<sort_sequence->num_specs; i++)
111         sort_sequence->specs[i] = 0;
112     
113     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
114
115     res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
116                          nmem, rset_nmem,
117                          sort_sequence,
118                          sset->num_bases, sset->basenames,
119                          &rset);
120     if (res != ZEBRA_OK)
121     {
122         sset->rset = 0;
123         return res;
124     }
125     for (i = 0; sort_sequence->specs[i]; i++)
126         ;
127     sort_sequence->num_specs = i;
128     rset->hits_limit = sset->approx_limit;
129     if (!i)
130     {
131         res = resultSetRank(zh, sset, rset, rset_nmem);
132     }
133     else
134     {
135         res = resultSetSortSingle(zh, nmem, sset, rset,
136                                   sort_sequence, &sort_status);
137     }
138     sset->rset = rset;
139     return res;
140 }
141
142
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144                           int num_bases, char **basenames,
145                           const char *setname,
146                           zint *hits, int *estimated_hit_count)
147 {
148     ZebraSet zebraSet;
149     int i;
150     ZEBRA_RES res;
151
152     *hits = 0;
153     *estimated_hit_count = 0;
154
155     zebraSet = resultSetAdd(zh, setname, 1);
156     if (!zebraSet)
157         return ZEBRA_FAIL;
158     zebraSet->locked = 1;
159     zebraSet->rpn = 0;
160     zebraSet->nmem = m;
161     zebraSet->rset_nmem = nmem_create(); 
162
163     zebraSet->num_bases = num_bases;
164     zebraSet->basenames = 
165         nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166     for (i = 0; i<num_bases; i++)
167         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
168
169     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
170                           rpn, zebraSet);
171     *hits = zebraSet->hits;
172     if (zebraSet->estimated_hit_count)
173         *estimated_hit_count = 1;
174
175     if (zebraSet->rset)
176         zebraSet->rpn = rpn;
177     zebraSet->locked = 0;
178     if (!zebraSet->rset)
179         return ZEBRA_FAIL;
180     return res;
181 }
182
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184                       const char *db, const char *index_name, 
185                       const char *term)
186 {
187     assert(zh); /* compiler shut up */
188     if (!s->nmem)
189         s->nmem = nmem_create();
190     if (!s->term_entries)
191     {
192         int i;
193         s->term_entries_max = 1000;
194         s->term_entries =
195             nmem_malloc(s->nmem, s->term_entries_max * 
196                         sizeof(*s->term_entries));
197         for (i = 0; i < s->term_entries_max; i++)
198             s->term_entries[i].term = 0;
199     }
200     if (s->hits < s->term_entries_max)
201     {
202         s->term_entries[s->hits].reg_type = reg_type;
203         s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
206     }
207     (s->hits)++;
208 }
209
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
211 {
212     ZebraSet s;
213     int i;
214
215     for (s = zh->sets; s; s = s->next)
216         if (!strcmp(s->name, name))
217             break;
218     
219     if (!log_level_set)
220         loglevels();
221     if (s)
222     {
223         yaz_log(log_level_resultsets, "updating result set %s", name);
224         if (!ov || s->locked)
225             return NULL;
226         if (s->rset)
227         {
228             if (s->cache_rfd)
229                 rset_close(s->cache_rfd);
230             rset_delete(s->rset);
231         }
232         if (s->rset_nmem)
233             nmem_destroy(s->rset_nmem);
234         if (s->nmem)
235             nmem_destroy(s->nmem);
236     }
237     else
238     {
239         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
240
241         yaz_log(log_level_resultsets, "adding result set %s", name);
242         s = (ZebraSet) xmalloc(sizeof(*s));
243         s->next = zh->sets;
244         zh->sets = s;
245         s->name = xstrdup(name);
246
247         s->sort_info = (struct zset_sort_info *)
248             xmalloc(sizeof(*s->sort_info));
249         s->sort_info->max_entries = atoi(sort_max_str);
250         if (s->sort_info->max_entries < 2)
251             s->sort_info->max_entries = 2;
252
253         s->sort_info->entries = (struct zset_sort_entry **)
254             xmalloc(sizeof(*s->sort_info->entries) *
255                     s->sort_info->max_entries);
256         s->sort_info->all_entries = (struct zset_sort_entry *)
257             xmalloc(sizeof(*s->sort_info->all_entries) *
258                     s->sort_info->max_entries);
259         for (i = 0; i < s->sort_info->max_entries; i++)
260             s->sort_info->entries[i] = s->sort_info->all_entries + i;
261     }
262     s->locked = 0;
263     s->term_entries = 0;
264     s->hits = 0;
265     s->rset = 0;
266     s->rset_nmem = 0;
267     s->nmem = 0;
268     s->rpn = 0;
269     s->sortSpec = 0;
270     s->cache_position = 0;
271     s->cache_rfd = 0;
272     s->approx_limit = zh->approx_limit;
273     s->estimated_hit_count = 0;
274     return s;
275 }
276
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
278 {
279     ZebraSet s;
280
281     for (s = zh->sets; s; s = s->next)
282         if (!strcmp(s->name, name))
283         {
284             if (!s->term_entries && !s->rset && s->rpn)
285             {
286                 NMEM nmem = nmem_create();
287                 yaz_log(log_level_resultsets, "research %s", name);
288                 if (!s->rset_nmem)
289                     s->rset_nmem = nmem_create();
290                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291                 if (s->rset && s->sortSpec)
292                 {
293                     int sort_status;
294                     yaz_log(log_level_resultsets, "resort %s", name);
295                     resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
296                                         &sort_status);
297                 }
298                 nmem_destroy(nmem);
299             }
300             return s;
301         }
302     return NULL;
303 }
304
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306                                 const char ***basenames, int *num_bases)
307 {
308     ZebraSet sset = resultSetGet(zh, setname);
309     if (!sset)
310         return ZEBRA_FAIL;
311     *basenames = sset->basenames;
312     *num_bases = sset->num_bases;
313     return ZEBRA_OK;
314
315 }
316
317 void resultSetInvalidate(ZebraHandle zh)
318 {
319     ZebraSet s = zh->sets;
320     
321     yaz_log(log_level_resultsets, "invalidating result sets");
322     for (; s; s = s->next)
323     {
324         if (s->rset)
325         {
326             if (s->cache_rfd)
327                 rset_close(s->cache_rfd);
328             rset_delete(s->rset);
329         }
330         s->rset = 0;
331         s->cache_rfd = 0;
332         s->cache_position = 0;
333         if (s->rset_nmem)
334             nmem_destroy(s->rset_nmem);
335         s->rset_nmem=0;
336     }
337 }
338
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
340 {
341     ZebraSet * ss = &zh->sets;
342     int i;
343     
344     if (statuses)
345         for (i = 0; i<num; i++)
346             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
347     while (*ss)
348     {
349         int i = -1;
350         ZebraSet s = *ss;
351         if (num >= 0)
352         {
353             for (i = 0; i<num; i++)
354                 if (!strcmp(s->name, names[i]))
355                 {
356                     if (statuses)
357                         statuses[i] = Z_DeleteStatus_success;
358                     i = -1;
359                     break;
360                 }
361         }
362         if (i < 0)
363         {
364             *ss = s->next;
365             
366             xfree(s->sort_info->all_entries);
367             xfree(s->sort_info->entries);
368             xfree(s->sort_info);
369             
370             if (s->nmem)
371                 nmem_destroy(s->nmem);
372             if (s->rset)
373             {
374                 if (s->cache_rfd)
375                     rset_close(s->cache_rfd);
376                 rset_delete(s->rset);
377             }
378             if (s->rset_nmem)
379                 nmem_destroy(s->rset_nmem);
380             xfree(s->name);
381             xfree(s);
382         }
383         else
384             ss = &s->next;
385     }
386 }
387
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
389                                                  const char *name, 
390                                                  zint start, int num)
391 {
392     zint pos_small[10];
393     zint *pos = pos_small;
394     ZebraMetaRecord *mr;
395     int i;
396
397     if (num > 10000 || num <= 0)
398         return 0;
399
400     if (num > 10)
401         pos = xmalloc(sizeof(*pos) * num);
402     
403     for (i = 0; i<num; i++)
404         pos[i] = start+i;
405
406     mr = zebra_meta_records_create(zh, name, num, pos);
407     
408     if (num > 10)
409         xfree(pos);
410     return mr;
411 }
412
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
414                                            int num, zint *positions)
415 {
416     ZebraSet sset;
417     ZebraMetaRecord *sr = 0;
418     RSET rset;
419     int i;
420     struct zset_sort_info *sort_info;
421     size_t sysno_mem_index = 0;
422
423     if (zh->m_staticrank)
424         sysno_mem_index = 1;
425
426     if (!log_level_set)
427         loglevels();
428     if (!(sset = resultSetGet(zh, name)))
429         return NULL;
430     if (!(rset = sset->rset))
431     {
432         if (!sset->term_entries)
433             return 0;
434         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435         for (i = 0; i<num; i++)
436         {
437             sr[i].sysno = 0;
438             sr[i].score = -1;
439             sr[i].term = 0;
440             sr[i].db = 0;
441
442             if (positions[i] <= sset->term_entries_max)
443             {
444                 sr[i].term = sset->term_entries[positions[i]-1].term;
445                 sr[i].db = sset->term_entries[positions[i]-1].db;
446             }
447         }
448     }
449     else
450     {
451         sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452         for (i = 0; i<num; i++)
453         {
454             sr[i].sysno = 0;
455             sr[i].score = -1;
456             sr[i].term = 0;
457             sr[i].db = 0;
458         }
459         sort_info = sset->sort_info;
460         if (sort_info)
461         {
462             zint position;
463             
464             for (i = 0; i<num; i++)
465             {
466                 position = positions[i];
467                 if (position > 0 && position <= sort_info->num_entries)
468                 {
469                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470                             " (sorted)", position);
471                     sr[i].sysno = sort_info->entries[position-1]->sysno;
472                     sr[i].score = sort_info->entries[position-1]->score;
473                 }
474             }
475         }
476         /* did we really get all entries using sort ? */
477         for (i = 0; i<num; i++)
478         {
479             if (!sr[i].sysno)
480                 break;
481         }
482         if (i < num) /* nope, get the rest, unsorted - sorry */
483         {
484             zint position = 0;
485             int num_i = 0;
486             zint psysno = 0;
487             RSFD rfd;
488             struct it_key key;
489             
490             if (sort_info)
491                 position = sort_info->num_entries;
492             while (num_i < num && positions[num_i] <= position)
493                 num_i++;
494             
495             if (sset->cache_rfd &&
496                 num_i < num && positions[num_i] > sset->cache_position)
497             {
498                 position = sset->cache_position;
499                 rfd = sset->cache_rfd;
500                 psysno = sset->cache_psysno;
501             }
502             else
503             {
504                 if (sset->cache_rfd)
505                     rset_close(sset->cache_rfd);
506                 rfd = rset_open(rset, RSETF_READ);
507             }
508             while (num_i < num && rset_read(rfd, &key, 0))
509             {
510                 zint this_sys = key.mem[sysno_mem_index];
511                 if (this_sys != psysno)
512                 {
513                     psysno = this_sys;
514                     if (sort_info)
515                     {
516                         /* determine we alreay have this in our set */
517                         for (i = sort_info->num_entries; --i >= 0; )
518                             if (psysno == sort_info->entries[i]->sysno)
519                                 break;
520                         if (i >= 0)
521                             continue;
522                     }
523                     position++;
524                     assert(num_i < num);
525                     if (position == positions[num_i])
526                     {
527                         sr[num_i].sysno = psysno;
528                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529                         sr[num_i].score = -1;
530                         num_i++;
531                     }
532                 }
533             }
534             sset->cache_position = position;
535             sset->cache_psysno = psysno;
536             sset->cache_rfd = rfd;
537         }
538     }
539     return sr;
540 }
541
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
543                                 int num)
544 {
545     assert(zh); /* compiler shut up about unused arg */
546     xfree(records);
547 }
548
549 struct sortKeyInfo {
550     int relation;
551     int *ord; /* array of ord for each database searched */
552     int *numerical; /* array of ord for each database searched */
553     const char *index_type;
554 };
555
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557                          int database_no,
558                          struct sortKeyInfo *criteria, int num_criteria,
559                          zint sysno,
560                          char *cmp_buf[], char *tmp_cmp_buf[])
561 {
562     struct zset_sort_entry *new_entry = NULL;
563     struct zset_sort_info *sort_info = sset->sort_info;
564     int i, j;
565     WRBUF w = wrbuf_alloc();
566
567     zebra_sort_sysno(zh->reg->sort_index, sysno);
568     for (i = 0; i<num_criteria; i++)
569     {
570         char *this_entry_buf = tmp_cmp_buf[i];
571         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
572         
573         if (criteria[i].ord[database_no] != -1)
574         {
575             yaz_log(log_level_sort, "pre zebra_sort_type ord is %d",
576                     criteria[i].ord[database_no]);
577             zebra_sort_type(zh->reg->sort_index, criteria[i].ord[database_no]);
578             wrbuf_rewind(w);
579             if (zebra_sort_read(zh->reg->sort_index, w))
580             {
581                 int off = 0;
582                 while (off != wrbuf_len(w))
583                 {
584                     size_t l = strlen(wrbuf_buf(w)+off);
585                     assert(off < wrbuf_len(w));
586
587                     if (l >= SORT_IDX_ENTRYSIZE)
588                         l = SORT_IDX_ENTRYSIZE-1;
589                     if (off == 0)
590                     {
591                         memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
592                         this_entry_buf[l] = '\0';
593                     }
594                     else if (criteria[i].relation == 'A')
595                     {
596                         if (strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0)
597                         {
598                             memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
599                             this_entry_buf[l] = '\0';
600                         }
601                     }
602                     else if (criteria[i].relation == 'D')
603                     {
604                         if (strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0)
605                         {
606                             memcpy(this_entry_buf, wrbuf_buf(w)+off, l);
607                             this_entry_buf[l] = '\0';
608                         }
609                     }
610                     off += 1 + strlen(wrbuf_buf(w)+off);
611                 }
612             }
613         }
614         else
615         {
616             yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
617         }
618     }
619     wrbuf_destroy(w);
620     i = sort_info->num_entries;
621     while (--i >= 0)
622     {
623         int rel = 0;
624         for (j = 0; j<num_criteria; j++)
625         {
626             char *this_entry_buf = tmp_cmp_buf[j];
627             char *other_entry_buf = 
628                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
629             if (criteria[j].numerical[database_no])
630             {
631                 char this_entry_org[1024];
632                 char other_entry_org[1024];
633                 double diff;
634                 /* when searching multiple databases, we use the index
635                    type of the first one. So if they differ between
636                    databases, we have a problem here we could store the
637                    index_type for each database, but if we didn't find the
638                    record in any sort index, then we still don't know to
639                    which database it belongs. */
640                 const char *index_type = criteria[j].index_type;
641                 zebra_term_untrans(zh, index_type, this_entry_org,
642                                    this_entry_buf);
643                 zebra_term_untrans(zh, index_type, other_entry_org,
644                                    other_entry_buf);
645                 diff = atof(this_entry_org) - atof(other_entry_org);
646                 
647                 if (diff > 0.0)
648                     rel = 1;
649                 else if (diff < 0.0)
650                     rel = -1;
651                 else
652                     rel = 0;
653             }
654             else
655             {
656                 rel = memcmp(this_entry_buf, other_entry_buf,
657                              SORT_IDX_ENTRYSIZE);
658             }
659             /* when the compare is equal, continue to next criteria, 
660                else break out */
661             if (rel)
662                 break;
663         }       
664         if (!rel)
665             break;
666         if (criteria[j].relation == 'A')
667         {
668             if (rel > 0)
669                 break;
670         }
671         else if (criteria[j].relation == 'D')
672         {
673             if (rel < 0)
674                 break;
675         }
676     }
677     ++i;
678     yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
679     j = sort_info->max_entries;
680     if (i == j){
681         yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
682         return;
683     }
684
685     if (sort_info->num_entries == j)
686         --j;
687     else
688         j = (sort_info->num_entries)++;
689     new_entry = sort_info->entries[j];
690     /* move up all higher entries (to make room) */
691     while (j != i)
692     {
693         int k;
694         for (k = 0; k<num_criteria; k++)
695         {
696             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
697             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
698             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
699         }
700         sort_info->entries[j] = sort_info->entries[j-1];
701         --j;
702     }
703     /* and insert the new entry at the correct place */
704     sort_info->entries[i] = new_entry;
705     assert(new_entry);
706     /* and add this to the compare buffer */
707     for (i = 0; i<num_criteria; i++)
708     {
709         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
710         char *this_entry_buf = tmp_cmp_buf[i];
711         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
712     }
713     new_entry->sysno = sysno;
714     new_entry->score = -1;
715 }
716
717 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
718                          zint sysno, int score, int relation)
719 {
720     struct zset_sort_entry *new_entry = NULL;
721     int i, j;
722     assert(zh); /* compiler shut up about unused arg */
723
724     i = sort_info->num_entries;
725     while (--i >= 0)
726     {
727         int rel = 0;
728
729         rel = score - sort_info->entries[i]->score;
730
731         if (relation == 'D')
732         {
733             if (rel >= 0)
734                 break;
735         }
736         else if (relation == 'A')
737         {
738             if (rel <= 0)
739                 break;
740         }
741     }
742     ++i;
743     j = sort_info->max_entries;
744     if (i == j)
745         return;
746
747     if (sort_info->num_entries == j)
748         --j;
749     else
750         j = (sort_info->num_entries)++;
751     
752     new_entry = sort_info->entries[j];
753     while (j != i)
754     {
755         sort_info->entries[j] = sort_info->entries[j-1];
756         --j;
757     }
758     sort_info->entries[i] = new_entry;
759     assert(new_entry);
760     new_entry->sysno = sysno;
761     new_entry->score = score;
762 }
763
764 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
765 {
766     Z_RPNQuery *dst = 0;
767     ODR encode = odr_createmem(ODR_ENCODE);
768     ODR decode = odr_createmem(ODR_DECODE);
769
770     if (z_RPNQuery(encode, &src, 0, 0))
771     {
772         int len;
773         char *buf = odr_getbuf(encode, &len, 0);
774
775         if (buf)
776         {
777             odr_setbuf(decode, buf, len, 0);
778             z_RPNQuery(decode, &dst, 0, 0);
779         }
780     }
781     nmem_transfer(nmem, decode->mem);
782     odr_destroy(encode);
783     odr_destroy(decode);
784     return dst;
785 }
786
787 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
788 {
789     Z_SortKeySpecList *dst = 0;
790     ODR encode = odr_createmem(ODR_ENCODE);
791     ODR decode = odr_createmem(ODR_DECODE);
792
793     if (z_SortKeySpecList(encode, &src, 0, 0))
794     {
795         int len;
796         char *buf = odr_getbuf(encode, &len, 0);
797
798         if (buf)
799         {
800             odr_setbuf(decode, buf, len, 0);
801             z_SortKeySpecList(decode, &dst, 0, 0);
802         }
803     }
804     nmem_transfer(nmem, decode->mem);
805     odr_destroy(encode);
806     odr_destroy(decode);
807     return dst;
808 }
809
810 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
811                         ZebraSet rset)
812 {
813     ZebraSet nset;
814     int i;
815
816     nset = resultSetAdd(zh, setname, 1);
817     if (!nset)
818         return 0;
819
820     nset->nmem = nmem_create();
821
822     nset->num_bases = rset->num_bases;
823     nset->basenames = 
824         nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
825     for (i = 0; i<rset->num_bases; i++)
826         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
827
828     if (rset->rset)
829         nset->rset = rset_dup(rset->rset);
830     if (rset->rpn)
831         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
832     return nset;
833 }
834
835 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
836                         int num_input_setnames, const char **input_setnames,
837                         const char *output_setname,
838                         Z_SortKeySpecList *sort_sequence, int *sort_status)
839 {
840     ZebraSet sset;
841     RSET rset;
842
843     if (num_input_setnames == 0)
844     {
845         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
846         return ZEBRA_FAIL;
847     }
848     if (num_input_setnames > 1)
849     {
850         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
851         return ZEBRA_FAIL;
852     }
853     if (!log_level_set)
854         loglevels();
855     yaz_log(log_level_sort, "result set sort input=%s output=%s",
856             *input_setnames, output_setname);
857     sset = resultSetGet(zh, input_setnames[0]);
858     if (!sset)
859     {
860         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
861                        input_setnames[0]);
862         return ZEBRA_FAIL;
863     }
864     if (!(rset = sset->rset))
865     {
866         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
867                        input_setnames[0]);
868         return ZEBRA_FAIL;
869     }
870     if (strcmp(output_setname, input_setnames[0]))
871         sset = resultSetClone(zh, output_setname, sset);
872     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
873     return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
874                                sort_status);
875 }
876
877 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
878                               ZebraSet sset, RSET rset,
879                               Z_SortKeySpecList *sort_sequence,
880                               int *sort_status)
881 {
882     int i;
883     int ib;
884     int n = 0;
885     zint kno = 0;
886     zint psysno = 0;
887     struct it_key key;
888     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
889     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
890     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
891     int num_criteria;
892     RSFD rfd;
893     TERMID termid;
894     TERMID *terms;
895     int numTerms = 0;
896     size_t sysno_mem_index = 0;
897     
898     int numbases = zh->num_basenames;
899     yaz_log(log_level_sort, "searching %d databases",numbases);
900
901     if (zh->m_staticrank)
902         sysno_mem_index = 1;
903
904     assert(nmem); /* compiler shut up about unused param */
905     sset->sort_info->num_entries = 0;
906
907     rset_getterms(rset, 0, 0, &n);
908     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
909     rset_getterms(rset, terms, n, &numTerms);
910
911     sset->hits = 0;
912     num_criteria = sort_sequence->num_specs;
913     if (num_criteria > ZSET_SORT_MAX_LEVEL)
914         num_criteria = ZSET_SORT_MAX_LEVEL;
915     /* set up the search criteria */
916     for (i = 0; i < num_criteria; i++)
917     {
918         Z_SortKeySpec *sks = sort_sequence->specs[i];
919         Z_SortKey *sk;
920         ZEBRA_RES res;
921         
922         sort_criteria[i].ord = (int *)
923             nmem_malloc(nmem, sizeof(int)*numbases);
924         sort_criteria[i].numerical = (int *)
925             nmem_malloc(nmem, sizeof(int)*numbases);
926         
927         /* initialize ord and numerical for each database */
928         for (ib = 0; ib < numbases; ib++)
929         {
930             sort_criteria[i].ord[ib] = -1;
931             sort_criteria[i].numerical[ib] = 0;
932         }
933
934         if (sks->which == Z_SortKeySpec_missingValueData)
935         {
936             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
937             return ZEBRA_FAIL;
938         }
939         if (*sks->sortRelation == Z_SortKeySpec_ascending)
940             sort_criteria[i].relation = 'A';
941         else if (*sks->sortRelation == Z_SortKeySpec_descending)
942             sort_criteria[i].relation = 'D';
943         else
944         {
945             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
946             return ZEBRA_FAIL;
947         }
948         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
949         {
950             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
951             return ZEBRA_FAIL;
952         }
953         else if (sks->sortElement->which != Z_SortElement_generic)
954         {
955             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
956             return ZEBRA_FAIL;
957         }       
958         sk = sks->sortElement->u.generic;
959         switch (sk->which)
960         {
961         case Z_SortKey_sortField:
962             yaz_log(log_level_sort, "key %d is of type sortField", i+1);
963             for (ib = 0; ib < numbases; ib++)
964             {
965                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
966                 sort_criteria[i].numerical[ib] = 0;
967                 sort_criteria[i].ord[ib] = 
968                     zebraExplain_lookup_attr_str(zh->reg->zei,
969                                                  zinfo_index_category_sort,
970                                                  0, sk->u.sortField);
971                 if (sks->which != Z_SortKeySpec_null
972                     && sort_criteria[i].ord[ib] == -1)
973                 {
974                     zebra_setError(zh,
975                                    YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
976                     return ZEBRA_FAIL;
977                 }
978             }
979             break;
980         case Z_SortKey_elementSpec:
981             yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
982             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
983             return ZEBRA_FAIL;
984         case Z_SortKey_sortAttributes:
985             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
986             /* for every database we searched, get the sort index file
987                id (ord) and its numerical indication and store them in
988                the sort_criteria */
989             for (ib = 0; ib < numbases; ib++)
990             {
991                 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
992                 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
993                                          &sort_criteria[i].ord[ib],
994                                          &sort_criteria[i].numerical[ib]);
995             }
996             
997             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
998                 return ZEBRA_FAIL;
999             break;
1000         }
1001         /* right now we look up the index type based on the first database
1002            if the index_type's can differ between the indexes of different
1003            databases (which i guess they can?) then we have to store the
1004            index types for each database, just like the ord and numerical */
1005         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
1006                                     &sort_criteria[i].index_type,
1007                                     0, 0))
1008         {
1009             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1010             return ZEBRA_FAIL;
1011         }
1012     }
1013     /* allocate space for each cmpare buf + one extra for tmp comparison */
1014     /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1015        all other result entries to compare against. This is slowly filled when records are processed.
1016        tmp_cmp_buf is an array with a value of the current record for each criteria
1017     */
1018     for (i = 0; i<num_criteria; i++)
1019     {
1020         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1021                              * SORT_IDX_ENTRYSIZE);
1022         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1023     }
1024     rfd = rset_open(rset, RSETF_READ);
1025     while (rset_read(rfd, &key, &termid))
1026     {
1027         zint this_sys = key.mem[sysno_mem_index];
1028         if (log_level_searchhits)
1029             key_logdump_txt(log_level_searchhits, &key, termid->name);
1030         kno++;
1031         if (this_sys != psysno)
1032         {
1033             int database_no = 0;
1034             if ((sset->hits & 255) == 0 && zh->break_handler_func)
1035             {
1036                 if (zh->break_handler_func(zh->break_handler_data))
1037                 {
1038                     rset_set_hits_limit(rset, 0);
1039                     break;
1040                 }
1041             }
1042             (sset->hits)++;
1043             psysno = this_sys;
1044
1045             /* determine database from the term, but only bother if more than
1046                one database is in use*/
1047             if (numbases > 1 && termid->ol)
1048             {
1049                 const char *this_db = 0;
1050                 if (zebraExplain_lookup_ord(zh->reg->zei, termid->ol->ord,  0, &this_db, 0)
1051                     == 0 && this_db)
1052                 {
1053                     for (ib = 0; ib < numbases; ib++)
1054                         if (!strcmp(this_db, zh->basenames[ib]))
1055                             database_no = ib;
1056                 }
1057             }
1058 #if 0
1059             yaz_log(YLOG_LOG, "sysno=" ZINT_FORMAT " database_no=%d", this_sys,
1060                 database_no);
1061             ord_list_print(termid->ol);
1062 #endif
1063             resultSetInsertSort(zh, sset, database_no,
1064                                 sort_criteria, num_criteria, psysno, cmp_buf,
1065                                 tmp_cmp_buf);
1066         }
1067     }
1068     rset_close(rfd);
1069
1070     /* free the compare buffers */
1071     for (i = 0; i<num_criteria; i++)
1072     {
1073         xfree(cmp_buf[i]);
1074         xfree(tmp_cmp_buf[i]);
1075     }
1076
1077     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1078             kno, sset->hits);   
1079     for (i = 0; i < numTerms; i++)
1080         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1081                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1082     *sort_status = Z_SortResponse_success;
1083     return ZEBRA_OK;
1084 }
1085
1086 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1087 {
1088     ZebraSet s;
1089
1090     if ((s = resultSetGet(zh, resultSetId)))
1091         return s->rset;
1092     return NULL;
1093 }
1094
1095 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1096                         RSET rset, NMEM nmem)
1097 {
1098     struct it_key key;
1099     TERMID termid;
1100     TERMID *terms;
1101     zint kno = 0;
1102     int numTerms = 0;
1103     int n = 0;
1104     int i;
1105     ZebraRankClass rank_class;
1106     struct zset_sort_info *sort_info;
1107     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1108     size_t sysno_mem_index = 0;
1109
1110     if (zh->m_staticrank)
1111         sysno_mem_index = 1;
1112
1113     if (!log_level_set)
1114         loglevels();
1115     sort_info = zebraSet->sort_info;
1116     sort_info->num_entries = 0;
1117     zebraSet->hits = 0;
1118     zebraSet->estimated_hit_count = 0;
1119     rset_getterms(rset, 0, 0, &n);
1120     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1121     rset_getterms(rset, terms, n, &numTerms);
1122
1123     rank_class = zebraRankLookup(zh, rank_handler_name);
1124     if (!rank_class)
1125     {
1126         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1127         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1128         return ZEBRA_FAIL;
1129     }
1130     else
1131     {
1132         RSFD rfd = rset_open(rset, RSETF_READ);
1133         struct rank_control *rc = rank_class->control;
1134         int score;
1135         zint count = 0;
1136         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1137                                      nmem, terms, numTerms);
1138         zint psysno = 0;  /* previous doc id / sys no */
1139         zint pstaticrank = 0; /* previous static rank */
1140         int stop_flag = 0;
1141         while (rset_read(rfd, &key, &termid))
1142         {
1143             zint this_sys = key.mem[sysno_mem_index];
1144
1145             zint seqno = key.mem[key.len-1];
1146             kno++;
1147             if (log_level_searchhits)
1148                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1149             if (this_sys != psysno) 
1150             {   /* new record .. */
1151                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1152                 {
1153                     if (zh->break_handler_func(zh->break_handler_data))
1154                     {
1155                         yaz_log(YLOG_LOG, "Aborted search");
1156                         stop_flag = 1;
1157                     }
1158                 }
1159                 if (rfd->counted_items > rset->hits_limit)
1160                     stop_flag = 1;
1161                 if (psysno)
1162                 {   /* only if we did have a previous record */
1163                     score = (*rc->calc)(handle, psysno, pstaticrank,
1164                                         &stop_flag);
1165                     /* insert the hit. A=Ascending */
1166                     resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1167                     count++;
1168                 }
1169                 if (stop_flag)
1170                 {
1171                     zebraSet->estimated_hit_count = 1;
1172                     rset_set_hits_limit(rset, 0);
1173                     break;
1174                 }
1175                 psysno = this_sys;
1176                 if (zh->m_staticrank)
1177                     pstaticrank = key.mem[0];
1178             }
1179             (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1180         }
1181         /* no more items */
1182         if (psysno)
1183         {   /* we had - at least - one record */
1184             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1185             /* insert the hit. A=Ascending */
1186             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1187             count++;
1188         }
1189         (*rc->end)(zh->reg, handle);
1190         rset_close(rfd);
1191     }
1192     zebraSet->hits = rset->hits_count;
1193
1194     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1195             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1196     for (i = 0; i < numTerms; i++)
1197     {
1198         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1199                 ZINT_FORMAT,
1200                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1201     }
1202     return ZEBRA_OK;
1203 }
1204
1205 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1206 {
1207     ZebraRankClass p = zh->reg->rank_classes;
1208     while (p && strcmp(p->control->name, name))
1209         p = p->next;
1210     if (p && !p->init_flag)
1211     {
1212         if (p->control->create)
1213             p->class_handle = (*p->control->create)(zh);
1214         p->init_flag = 1;
1215     }
1216     return p;
1217 }
1218
1219 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1220 {
1221     ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1222     p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1223     memcpy(p->control, ctrl, sizeof(*p->control));
1224     p->control->name = xstrdup(ctrl->name);
1225     p->init_flag = 0;
1226     p->next = reg->rank_classes;
1227     reg->rank_classes = p;
1228 }
1229
1230 void zebraRankDestroy(struct zebra_register *reg)
1231 {
1232     ZebraRankClass p = reg->rank_classes;
1233     while (p)
1234     {
1235         ZebraRankClass p_next = p->next;
1236         if (p->init_flag && p->control->destroy)
1237             (*p->control->destroy)(reg, p->class_handle);
1238         xfree(p->control->name);
1239         xfree(p->control);
1240         xfree(p);
1241         p = p_next;
1242     }
1243     reg->rank_classes = NULL;
1244 }
1245
1246 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1247                                  zint *hits_array, int *approx_array)
1248 {
1249     int no = 0;
1250     int i;
1251     for (i = 0; i<rset->no_children; i++)
1252         no += trav_rset_for_termids(rset->children[i],
1253                                     (termid_array ? termid_array + no : 0),
1254                                     (hits_array ? hits_array + no : 0),
1255                                     (approx_array ? approx_array + no : 0));
1256     if (rset->term)
1257     {
1258         if (termid_array)
1259             termid_array[no] = rset->term;
1260         if (hits_array)
1261             hits_array[no] = rset->hits_count;
1262         if (approx_array)
1263             approx_array[no] = rset->hits_approx;
1264 #if 0
1265         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1266                 " count=" ZINT_FORMAT,
1267                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1268 #endif
1269         no++;
1270     }
1271     return no;
1272 }
1273
1274 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1275                                    int *num_terms)
1276 {
1277     ZebraSet sset = resultSetGet(zh, setname);
1278     *num_terms = 0;
1279     if (sset)
1280     {
1281         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1282         return ZEBRA_OK;
1283     }
1284     return ZEBRA_FAIL;
1285 }
1286
1287 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1288                                      int no, zint *count, int *approx,
1289                                      char *termbuf, size_t *termlen,
1290                                      const char **term_ref_id)
1291 {
1292     ZebraSet sset = resultSetGet(zh, setname);
1293     if (sset)
1294     {
1295         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1296         if (no >= 0 && no < num_terms)
1297         {
1298             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1299             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1300             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1301             
1302             trav_rset_for_termids(sset->rset, term_array,
1303                                   hits_array, approx_array);
1304
1305             if (count)
1306                 *count = hits_array[no];
1307             if (approx)
1308                 *approx = approx_array[no];
1309             if (termbuf)
1310             {
1311                 char *inbuf = term_array[no]->name;
1312                 size_t inleft = strlen(inbuf);
1313                 size_t outleft = *termlen - 1;
1314
1315                 if (zh->iconv_from_utf8 != 0)
1316                 {
1317                     char *outbuf = termbuf;
1318                     size_t ret;
1319                     
1320                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1321                                     &outbuf, &outleft);
1322                     if (ret == (size_t)(-1))
1323                         *termlen = 0;
1324                     else
1325                     {
1326                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1327                                   &outbuf, &outleft);
1328                         *termlen = outbuf - termbuf;
1329                     }
1330                 }
1331                 else
1332                 {
1333                     if (inleft > outleft)
1334                         inleft = outleft;
1335                     *termlen = inleft;
1336                     memcpy(termbuf, inbuf, *termlen);
1337                 }
1338                 termbuf[*termlen] = '\0';
1339             }
1340             if (term_ref_id)
1341                 *term_ref_id = term_array[no]->ref_id;
1342
1343             xfree(term_array);
1344             xfree(hits_array);
1345             xfree(approx_array);
1346             return ZEBRA_OK;
1347         }
1348     }
1349     return ZEBRA_FAIL;
1350 }
1351
1352 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1353                                     zint sysno, zebra_snippets *snippets)
1354 {
1355     ZebraSet sset = resultSetGet(zh, setname);
1356     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1357             setname, sysno);
1358     if (!sset)
1359         return ZEBRA_FAIL;
1360     else
1361     {
1362         struct rset_key_control *kc = zebra_key_control_create(zh);
1363         NMEM nmem = nmem_create();
1364         struct it_key key;
1365         RSET rsets[2], rset_comb;
1366         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1367                                           res_get(zh->res, "setTmpDir"),0 );
1368         
1369         TERMID termid;
1370         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1371         
1372         key.mem[0] = sysno;
1373         key.mem[1] = 0;
1374         key.mem[2] = 0;
1375         key.mem[3] = 0;
1376         key.len = 2;
1377         rset_write(rsfd, &key);
1378         rset_close(rsfd);
1379
1380         rsets[0] = rset_temp;
1381         rsets[1] = rset_dup(sset->rset);
1382         
1383         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1384
1385         rsfd = rset_open(rset_comb, RSETF_READ);
1386
1387         while (rset_read(rsfd, &key, &termid))
1388         {
1389             if (termid)
1390             {
1391                 struct ord_list *ol;
1392                 for (ol = termid->ol; ol; ol = ol->next)
1393                 {
1394                     zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1395                                           ol->ord, termid->name);
1396                 }
1397             }
1398         }
1399         rset_close(rsfd);
1400         
1401         rset_delete(rset_comb);
1402         nmem_destroy(nmem);
1403         kc->dec(kc);
1404     }
1405     return ZEBRA_OK;
1406 }
1407
1408 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, 
1409                                       const char **basenames, int num_bases,
1410                                       zint recid,
1411                                       zint *sysnos, int *no_sysnos)
1412 {
1413     ZEBRA_RES res = ZEBRA_OK;
1414     int sysnos_offset = 0;
1415     int i;
1416     
1417     if (!zh->reg->isamb || !zh->m_segment_indexing)
1418     {
1419         if (sysnos_offset < *no_sysnos)
1420             *sysnos = recid;
1421         sysnos_offset++;
1422     }
1423     else
1424     {
1425         for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1426         {
1427             const char *database = basenames[i];
1428             if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1429             {
1430                 const char *index_type = "w";
1431                 const char *use_string = "_ALLRECORDS";
1432                 int ord;
1433                 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1434                 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1435                                                    index_type, use_string);
1436                 if (ord != -1)
1437                 {
1438                     char ord_buf[32];
1439                     int ord_len = key_SU_encode(ord, ord_buf);
1440                     char *info;
1441                 
1442                     ord_buf[ord_len] = '\0';
1443                 
1444                     info = dict_lookup(zh->reg->dict, ord_buf);
1445                     if (info)
1446                     {
1447                         if (*info != sizeof(ISAM_P))
1448                         {
1449                             res = ZEBRA_FAIL;
1450                         }
1451                         else
1452                         {
1453                             ISAM_P isam_p;
1454                             ISAMB_PP pt;
1455                             struct it_key key_until, key_found;
1456                             int i = 0;
1457                             int r;
1458                         
1459                             memcpy(&isam_p, info+1, sizeof(ISAM_P));
1460                         
1461                             pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1462                             if (!pt)
1463                                 res = ZEBRA_FAIL;
1464                             else
1465                             {
1466                                 key_until.mem[i++] = recid;
1467                                 key_until.mem[i++] = 0;  /* section_id */
1468                                 if (zh->m_segment_indexing)
1469                                     key_until.mem[i++] = 0; /* segment */
1470                                 key_until.mem[i++] = 0;
1471                                 key_until.len = i;
1472                             
1473                                 r = isamb_pp_forward(pt, &key_found, &key_until);
1474                                 while (r && key_found.mem[0] == recid)
1475                                 {
1476                                     if (sysnos_offset < *no_sysnos)
1477                                         sysnos[sysnos_offset++] = 
1478                                             key_found.mem[key_found.len-1];
1479                                     r = isamb_pp_read(pt, &key_found);
1480                                 }
1481                                 isamb_pp_close(pt);
1482                             }
1483                         }
1484                     }
1485                 }
1486             }
1487         }
1488     }
1489     *no_sysnos = sysnos_offset;
1490     return res;
1491 }
1492
1493 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, 
1494                                       const char *setname,
1495                                       zint recid,
1496                                       zint *sysnos, int *no_sysnos)
1497 {
1498     const char **basenames;
1499     int num_bases;
1500     ZEBRA_RES res;
1501
1502     res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1503     if (res != ZEBRA_OK)
1504         return ZEBRA_FAIL;
1505
1506     return zebra_recid_to_sysno(zh, basenames, num_bases,
1507                                 recid, sysnos, no_sysnos);
1508 }
1509
1510 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1511                      zint approx_limit)
1512 {
1513     zint psysno = 0;
1514     struct it_key key;
1515     RSFD rfd;
1516
1517     yaz_log(YLOG_DEBUG, "count_set");
1518
1519     rset->hits_limit = approx_limit;
1520
1521     *count = 0;
1522     rfd = rset_open(rset, RSETF_READ);
1523     while (rset_read(rfd, &key,0 /* never mind terms */))
1524     {
1525         if (key.mem[0] != psysno)
1526         {
1527             psysno = key.mem[0];
1528             if (rfd->counted_items >= rset->hits_limit)
1529                 break;
1530         }
1531     }
1532     rset_close(rfd);
1533     *count = rset->hits_count;
1534 }
1535                    
1536
1537 /*
1538  * Local variables:
1539  * c-basic-offset: 4
1540  * indent-tabs-mode: nil
1541  * End:
1542  * vim: shiftwidth=4 tabstop=8 expandtab
1543  */
1544