For estimated hit counts, Zebra returns resultSetStatus=estimtate as
[idzebra-moved-to-github.git] / index / zsets.c
1 /* $Id: zsets.c,v 1.116 2007-01-16 15:01:15 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31
32 #include "index.h"
33 #include "rank.h"
34 #include <yaz/diagbib1.h>
35 #include <rset.h>
36
37 #define ZSET_SORT_MAX_LEVEL 10
38
39 struct zebra_set_term_entry {
40     int reg_type;
41     char *db;
42     char *index_name;
43     char *term;
44 };
45
46 struct zebra_set {
47     char *name;
48     RSET rset;
49     NMEM nmem;
50     NMEM rset_nmem; /* for creating the rsets in */
51     zint hits;
52     int num_bases;
53     char **basenames;
54     Z_RPNQuery *rpn;
55     Z_SortKeySpecList *sortSpec;
56     struct zset_sort_info *sort_info;
57     struct zebra_set_term_entry *term_entries;
58     int term_entries_max;
59     struct zebra_set *next;
60     int locked;
61     int estimated_hit_count;
62
63     zint cache_position;  /* last position */
64     RSFD cache_rfd;       /* rfd (NULL if not existing) */
65     zint cache_psysno;    /* sysno for last position */
66     zint approx_limit;    /* limit before we do approx */
67 };
68
69 struct zset_sort_entry {
70     zint sysno;
71     int score;
72 };
73
74 struct zset_sort_info {
75     int max_entries;
76     int num_entries;
77     struct zset_sort_entry *all_entries;
78     struct zset_sort_entry **entries;
79 };
80
81 static int log_level_set=0;
82 static int log_level_sort=0;
83 static int log_level_searchhits=0;
84 static int log_level_searchterms=0;
85 static int log_level_resultsets=0;
86
87 static void loglevels(void)
88 {
89     if (log_level_set)
90         return;
91     log_level_sort = yaz_log_module_level("sorting");
92     log_level_searchhits = yaz_log_module_level("searchhits");
93     log_level_searchterms = yaz_log_module_level("searchterms");
94     log_level_resultsets = yaz_log_module_level("resultsets");
95     log_level_set = 1;
96 }
97
98
99 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
100                                  Z_RPNQuery *rpn, ZebraSet sset)
101 {
102     RSET rset = 0;
103     oident *attrset;
104     Z_SortKeySpecList *sort_sequence;
105     int sort_status, i;
106     ZEBRA_RES res = ZEBRA_OK;
107
108     sort_sequence = (Z_SortKeySpecList *)
109         nmem_malloc(nmem, sizeof(*sort_sequence));
110     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
111     sort_sequence->specs = (Z_SortKeySpec **)
112         nmem_malloc(nmem, sort_sequence->num_specs *
113                      sizeof(*sort_sequence->specs));
114     for (i = 0; i<sort_sequence->num_specs; i++)
115         sort_sequence->specs[i] = 0;
116     
117     attrset = oid_getentbyoid (rpn->attributeSetId);
118
119     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
120
121     res = rpn_search_top(zh, rpn->RPNStructure, attrset->value,
122                          nmem, rset_nmem,
123                          sort_sequence,
124                          sset->num_bases, sset->basenames,
125                          &rset);
126     if (res != ZEBRA_OK)
127     {
128         sset->rset = 0;
129         return res;
130     }
131     for (i = 0; sort_sequence->specs[i]; i++)
132         ;
133     sort_sequence->num_specs = i;
134     rset->hits_limit = sset->approx_limit;
135     if (!i)
136     {
137         res = resultSetRank (zh, sset, rset, rset_nmem);
138     }
139     else
140     {
141         res = resultSetSortSingle (zh, nmem, sset, rset,
142                                    sort_sequence, &sort_status);
143     }
144     sset->rset = rset;
145     return res;
146 }
147
148
149 ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
150                            int num_bases, char **basenames,
151                            const char *setname,
152                            zint *hits, int *estimated_hit_count,
153                            int *partial_resultset)
154 {
155     ZebraSet zebraSet;
156     int i;
157     ZEBRA_RES res;
158
159     *hits = 0;
160     *estimated_hit_count = 0;
161     *partial_resultset = 0;
162
163     zebraSet = resultSetAdd(zh, setname, 1);
164     if (!zebraSet)
165         return ZEBRA_FAIL;
166     zebraSet->locked = 1;
167     zebraSet->rpn = 0;
168     zebraSet->nmem = m;
169     zebraSet->rset_nmem = nmem_create(); 
170
171     zebraSet->num_bases = num_bases;
172     zebraSet->basenames = 
173         nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
174     for (i = 0; i<num_bases; i++)
175         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
176
177     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
178                           rpn, zebraSet);
179     *hits = zebraSet->hits;
180     if (zebraSet->estimated_hit_count)
181         *estimated_hit_count = 1;
182
183     if (zebraSet->rset)
184         zebraSet->rpn = rpn;
185     zebraSet->locked = 0;
186     if (!zebraSet->rset)
187         return ZEBRA_FAIL;
188     return res;
189 }
190
191 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
192                       const char *db, const char *index_name, 
193                       const char *term)
194 {
195     assert(zh); /* compiler shut up */
196     if (!s->nmem)
197         s->nmem = nmem_create ();
198     if (!s->term_entries)
199     {
200         int i;
201         s->term_entries_max = 1000;
202         s->term_entries =
203             nmem_malloc (s->nmem, s->term_entries_max * 
204                          sizeof(*s->term_entries));
205         for (i = 0; i < s->term_entries_max; i++)
206             s->term_entries[i].term = 0;
207     }
208     if (s->hits < s->term_entries_max)
209     {
210         s->term_entries[s->hits].reg_type = reg_type;
211         s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
212         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
213         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
214     }
215     (s->hits)++;
216 }
217
218 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
219 {
220     ZebraSet s;
221     int i;
222
223     for (s = zh->sets; s; s = s->next)
224         if (!strcmp (s->name, name))
225             break;
226     
227     if (!log_level_set)
228         loglevels();
229     if (s)
230     {
231         yaz_log(log_level_resultsets, "updating result set %s", name);
232         if (!ov || s->locked)
233             return NULL;
234         if (s->rset)
235         {
236             if (s->cache_rfd)
237                 rset_close(s->cache_rfd);
238             rset_delete (s->rset);
239         }
240         if (s->rset_nmem)
241             nmem_destroy (s->rset_nmem);
242         if (s->nmem)
243             nmem_destroy (s->nmem);
244     }
245     else
246     {
247         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
248
249         yaz_log(log_level_resultsets, "adding result set %s", name);
250         s = (ZebraSet) xmalloc (sizeof(*s));
251         s->next = zh->sets;
252         zh->sets = s;
253         s->name = (char *) xmalloc (strlen(name)+1);
254         strcpy (s->name, name);
255
256         s->sort_info = (struct zset_sort_info *)
257             xmalloc (sizeof(*s->sort_info));
258         s->sort_info->max_entries = atoi(sort_max_str);
259         if (s->sort_info->max_entries < 2)
260             s->sort_info->max_entries = 2;
261
262         s->sort_info->entries = (struct zset_sort_entry **)
263             xmalloc (sizeof(*s->sort_info->entries) *
264                      s->sort_info->max_entries);
265         s->sort_info->all_entries = (struct zset_sort_entry *)
266             xmalloc (sizeof(*s->sort_info->all_entries) *
267                      s->sort_info->max_entries);
268         for (i = 0; i < s->sort_info->max_entries; i++)
269             s->sort_info->entries[i] = s->sort_info->all_entries + i;
270     }
271     s->locked = 0;
272     s->term_entries = 0;
273     s->hits = 0;
274     s->rset = 0;
275     s->rset_nmem = 0;
276     s->nmem = 0;
277     s->rpn = 0;
278     s->sortSpec = 0;
279     s->cache_position = 0;
280     s->cache_rfd = 0;
281     s->approx_limit = zh->approx_limit;
282     s->estimated_hit_count = 0;
283     return s;
284 }
285
286 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
287 {
288     ZebraSet s;
289
290     for (s = zh->sets; s; s = s->next)
291         if (!strcmp (s->name, name))
292         {
293             if (!s->term_entries && !s->rset && s->rpn)
294             {
295                 NMEM nmem = nmem_create ();
296                 yaz_log(log_level_resultsets, "research %s", name);
297                 if (!s->rset_nmem)
298                     s->rset_nmem=nmem_create();
299                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
300                 if (s->rset && s->sortSpec)
301                 {
302                     int sort_status;
303                     yaz_log(log_level_resultsets, "resort %s", name);
304                     resultSetSortSingle (zh, nmem, s, s->rset, s->sortSpec,
305                                          &sort_status);
306                 }
307                 nmem_destroy (nmem);
308             }
309             return s;
310         }
311     return NULL;
312 }
313
314 void resultSetInvalidate (ZebraHandle zh)
315 {
316     ZebraSet s = zh->sets;
317     
318     yaz_log(log_level_resultsets, "invalidating result sets");
319     for (; s; s = s->next)
320     {
321         if (s->rset)
322         {
323             if (s->cache_rfd)
324                 rset_close(s->cache_rfd);
325             rset_delete (s->rset);
326         }
327         s->rset = 0;
328         s->cache_rfd = 0;
329         s->cache_position = 0;
330         if (s->rset_nmem)
331             nmem_destroy(s->rset_nmem);
332         s->rset_nmem=0;
333     }
334 }
335
336 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
337 {
338     ZebraSet * ss = &zh->sets;
339     int i;
340     
341     if (statuses)
342         for (i = 0; i<num; i++)
343             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
344     while (*ss)
345     {
346         int i = -1;
347         ZebraSet s = *ss;
348         if (num >= 0)
349         {
350             for (i = 0; i<num; i++)
351                 if (!strcmp (s->name, names[i]))
352                 {
353                     if (statuses)
354                         statuses[i] = Z_DeleteStatus_success;
355                     i = -1;
356                     break;
357                 }
358         }
359         if (i < 0)
360         {
361             *ss = s->next;
362             
363             xfree (s->sort_info->all_entries);
364             xfree (s->sort_info->entries);
365             xfree (s->sort_info);
366             
367             if (s->nmem)
368                 nmem_destroy (s->nmem);
369             if (s->rset)
370             {
371                 if (s->cache_rfd)
372                     rset_close(s->cache_rfd);
373                 rset_delete (s->rset);
374             }
375             if (s->rset_nmem)
376                 nmem_destroy(s->rset_nmem);
377             xfree (s->name);
378             xfree (s);
379         }
380         else
381             ss = &s->next;
382     }
383 }
384
385 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
386                                                  const char *name, 
387                                                  zint start, int num)
388 {
389     zint pos_small[10];
390     zint *pos = pos_small;
391     ZebraMetaRecord *mr;
392     int i;
393
394     if (num > 10000 || num <= 0)
395         return 0;
396
397     if (num > 10)
398         pos = xmalloc(sizeof(*pos) * num);
399     
400     for (i = 0; i<num; i++)
401         pos[i] = start+i;
402
403     mr = zebra_meta_records_create(zh, name, num, pos);
404     
405     if (num > 10)
406         xfree(pos);
407     return mr;
408 }
409
410 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
411                                            int num, zint *positions)
412 {
413     ZebraSet sset;
414     ZebraMetaRecord *sr = 0;
415     RSET rset;
416     int i;
417     struct zset_sort_info *sort_info;
418     size_t sysno_mem_index = 0;
419
420     if (zh->m_staticrank)
421         sysno_mem_index = 1;
422
423     if (!log_level_set)
424         loglevels();
425     if (!(sset = resultSetGet (zh, name)))
426         return NULL;
427     if (!(rset = sset->rset))
428     {
429         if (!sset->term_entries)
430             return 0;
431         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
432         for (i = 0; i<num; i++)
433         {
434             sr[i].sysno = 0;
435             sr[i].score = -1;
436             sr[i].term = 0;
437             sr[i].db = 0;
438
439             if (positions[i] <= sset->term_entries_max)
440             {
441                 sr[i].term = sset->term_entries[positions[i]-1].term;
442                 sr[i].db = sset->term_entries[positions[i]-1].db;
443             }
444         }
445     }
446     else
447     {
448         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
449         for (i = 0; i<num; i++)
450         {
451             sr[i].sysno = 0;
452             sr[i].score = -1;
453             sr[i].term = 0;
454             sr[i].db = 0;
455         }
456         sort_info = sset->sort_info;
457         if (sort_info)
458         {
459             zint position;
460             
461             for (i = 0; i<num; i++)
462             {
463                 position = positions[i];
464                 if (position > 0 && position <= sort_info->num_entries)
465                 {
466                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
467                             " (sorted)", position);
468                     sr[i].sysno = sort_info->entries[position-1]->sysno;
469                     sr[i].score = sort_info->entries[position-1]->score;
470                 }
471             }
472         }
473         /* did we really get all entries using sort ? */
474         for (i = 0; i<num; i++)
475         {
476             if (!sr[i].sysno)
477                 break;
478         }
479         if (i < num) /* nope, get the rest, unsorted - sorry */
480         {
481             zint position = 0;
482             int num_i = 0;
483             zint psysno = 0;
484             RSFD rfd;
485             struct it_key key;
486             
487             if (sort_info)
488                 position = sort_info->num_entries;
489             while (num_i < num && positions[num_i] <= position)
490                 num_i++;
491             
492             if (sset->cache_rfd &&
493                 num_i < num && positions[num_i] > sset->cache_position)
494             {
495                 position = sset->cache_position;
496                 rfd = sset->cache_rfd;
497                 psysno = sset->cache_psysno;
498             }
499             else
500             {
501                 if (sset->cache_rfd)
502                     rset_close(sset->cache_rfd);
503                 rfd = rset_open (rset, RSETF_READ);
504             }
505             while (num_i < num && rset_read (rfd, &key, 0))
506             {
507                 zint this_sys = key.mem[sysno_mem_index];
508                 if (this_sys != psysno)
509                 {
510                     psysno = this_sys;
511                     if (sort_info)
512                     {
513                         /* determine we alreay have this in our set */
514                         for (i = sort_info->num_entries; --i >= 0; )
515                             if (psysno == sort_info->entries[i]->sysno)
516                                 break;
517                         if (i >= 0)
518                             continue;
519                     }
520                     position++;
521                     assert (num_i < num);
522                     if (position == positions[num_i])
523                     {
524                         sr[num_i].sysno = psysno;
525                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
526                         sr[num_i].score = -1;
527                         num_i++;
528                     }
529                 }
530             }
531             sset->cache_position = position;
532             sset->cache_psysno = psysno;
533             sset->cache_rfd = rfd;
534         }
535     }
536     return sr;
537 }
538
539 void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records,
540                                  int num)
541 {
542     assert(zh); /* compiler shut up about unused arg */
543     xfree (records);
544 }
545
546 struct sortKeyInfo {
547     int relation;
548     int ord;
549     int numerical;
550     int index_type;
551 };
552
553 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
554                          struct sortKeyInfo *criteria, int num_criteria,
555                          zint sysno,
556                          char *cmp_buf[], char *tmp_cmp_buf[])
557 {
558     struct zset_sort_entry *new_entry = NULL;
559     struct zset_sort_info *sort_info = sset->sort_info;
560     int i, j;
561
562     zebra_sort_sysno(zh->reg->sort_index, sysno);
563     for (i = 0; i<num_criteria; i++)
564     {
565         char *this_entry_buf = tmp_cmp_buf[i];
566         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
567         if (criteria[i].ord != -1)
568         {
569             zebra_sort_type(zh->reg->sort_index, criteria[i].ord);
570             zebra_sort_read(zh->reg->sort_index, this_entry_buf);
571         }
572     }
573     i = sort_info->num_entries;
574     while (--i >= 0)
575     {
576         int rel = 0;
577         for (j = 0; j<num_criteria; j++)
578         {
579             char *this_entry_buf = tmp_cmp_buf[j];
580             char *other_entry_buf = 
581                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
582             if (criteria[j].numerical)
583             {
584                 char this_entry_org[1024];
585                 char other_entry_org[1024];
586                 double diff;
587                 int index_type = criteria[j].index_type;
588                 zebra_term_untrans(zh, index_type, this_entry_org,
589                                    this_entry_buf);
590                 zebra_term_untrans(zh, index_type, other_entry_org,
591                                    other_entry_buf);
592                 diff = atof(this_entry_org) - atof(other_entry_org);
593                 
594                 if (diff > 0.0)
595                     rel = 1;
596                 else if (diff < 0.0)
597                     rel = -1;
598                 else
599                     rel = 0;
600             }
601             else
602             {
603                 rel = memcmp(this_entry_buf, other_entry_buf,
604                              SORT_IDX_ENTRYSIZE);
605             }
606             if (rel)
607                 break;
608         }       
609         if (!rel)
610             break;
611         if (criteria[j].relation == 'A')
612         {
613             if (rel > 0)
614                 break;
615         }
616         else if (criteria[j].relation == 'D')
617         {
618             if (rel < 0)
619                 break;
620         }
621     }
622     ++i;
623     j = sort_info->max_entries;
624     if (i == j)
625         return;
626
627     if (sort_info->num_entries == j)
628         --j;
629     else
630         j = (sort_info->num_entries)++;
631     new_entry = sort_info->entries[j];
632     while (j != i)
633     {
634         int k;
635         for (k = 0; k<num_criteria; k++)
636         {
637             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
638             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
639             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
640         }
641         sort_info->entries[j] = sort_info->entries[j-1];
642         --j;
643     }
644     sort_info->entries[i] = new_entry;
645     assert (new_entry);
646     for (i = 0; i<num_criteria; i++)
647     {
648         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
649         char *this_entry_buf = tmp_cmp_buf[i];
650         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
651     }
652     new_entry->sysno = sysno;
653     new_entry->score = -1;
654 }
655
656 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
657                          zint sysno, int score, int relation)
658 {
659     struct zset_sort_entry *new_entry = NULL;
660     int i, j;
661     assert(zh); /* compiler shut up about unused arg */
662
663     i = sort_info->num_entries;
664     while (--i >= 0)
665     {
666         int rel = 0;
667
668         rel = score - sort_info->entries[i]->score;
669
670         if (relation == 'D')
671         {
672             if (rel >= 0)
673                 break;
674         }
675         else if (relation == 'A')
676         {
677             if (rel <= 0)
678                 break;
679         }
680     }
681     ++i;
682     j = sort_info->max_entries;
683     if (i == j)
684         return;
685
686     if (sort_info->num_entries == j)
687         --j;
688     else
689         j = (sort_info->num_entries)++;
690     
691     new_entry = sort_info->entries[j];
692     while (j != i)
693     {
694         sort_info->entries[j] = sort_info->entries[j-1];
695         --j;
696     }
697     sort_info->entries[i] = new_entry;
698     assert (new_entry);
699     new_entry->sysno = sysno;
700     new_entry->score = score;
701 }
702
703 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
704 {
705     Z_RPNQuery *dst = 0;
706     ODR encode = odr_createmem(ODR_ENCODE);
707     ODR decode = odr_createmem(ODR_DECODE);
708
709     if (z_RPNQuery(encode, &src, 0, 0))
710     {
711         int len;
712         char *buf = odr_getbuf(encode, &len, 0);
713
714         if (buf)
715         {
716             odr_setbuf(decode, buf, len, 0);
717             z_RPNQuery(decode, &dst, 0, 0);
718         }
719     }
720     nmem_transfer(nmem, decode->mem);
721     odr_destroy(encode);
722     odr_destroy(decode);
723     return dst;
724 }
725
726 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
727 {
728     Z_SortKeySpecList *dst = 0;
729     ODR encode = odr_createmem(ODR_ENCODE);
730     ODR decode = odr_createmem(ODR_DECODE);
731
732     if (z_SortKeySpecList(encode, &src, 0, 0))
733     {
734         int len;
735         char *buf = odr_getbuf(encode, &len, 0);
736
737         if (buf)
738         {
739             odr_setbuf(decode, buf, len, 0);
740             z_SortKeySpecList(decode, &dst, 0, 0);
741         }
742     }
743     nmem_transfer(nmem, decode->mem);
744     odr_destroy(encode);
745     odr_destroy(decode);
746     return dst;
747 }
748
749 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
750                         ZebraSet rset)
751 {
752     ZebraSet nset;
753     int i;
754
755     nset = resultSetAdd(zh, setname, 1);
756     if (!nset)
757         return 0;
758
759     nset->nmem = nmem_create();
760
761     nset->num_bases = rset->num_bases;
762     nset->basenames = 
763         nmem_malloc (nset->nmem, nset->num_bases * sizeof(*rset->basenames));
764     for (i = 0; i<rset->num_bases; i++)
765         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
766
767     if (rset->rset)
768         nset->rset = rset_dup(rset->rset);
769     if (rset->rpn)
770         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
771     return nset;
772 }
773
774 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
775                         int num_input_setnames, const char **input_setnames,
776                         const char *output_setname,
777                         Z_SortKeySpecList *sort_sequence, int *sort_status)
778 {
779     ZebraSet sset;
780     RSET rset;
781
782     if (num_input_setnames == 0)
783     {
784         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
785         return ZEBRA_FAIL;
786     }
787     if (num_input_setnames > 1)
788     {
789         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
790         return ZEBRA_FAIL;
791     }
792     if (!log_level_set)
793         loglevels();
794     yaz_log(log_level_sort, "result set sort input=%s output=%s",
795           *input_setnames, output_setname);
796     sset = resultSetGet (zh, input_setnames[0]);
797     if (!sset)
798     {
799         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
800                        input_setnames[0]);
801         return ZEBRA_FAIL;
802     }
803     if (!(rset = sset->rset))
804     {
805         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
806                        input_setnames[0]);
807         return ZEBRA_FAIL;
808     }
809     if (strcmp (output_setname, input_setnames[0]))
810         sset = resultSetClone(zh, output_setname, sset);
811     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
812     return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
813                                 sort_status);
814 }
815
816 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
817                               ZebraSet sset, RSET rset,
818                               Z_SortKeySpecList *sort_sequence,
819                               int *sort_status)
820 {
821     int i;
822     int n = 0;
823     zint kno = 0;
824     zint psysno = 0;
825     struct it_key key;
826     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
827     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
828     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
829     int num_criteria;
830     RSFD rfd;
831     TERMID termid;
832     TERMID *terms;
833     int numTerms = 0;
834     size_t sysno_mem_index = 0;
835
836     if (zh->m_staticrank)
837         sysno_mem_index = 1;
838
839     assert(nmem); /* compiler shut up about unused param */
840     sset->sort_info->num_entries = 0;
841
842     rset_getterms(rset, 0, 0, &n);
843     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
844     rset_getterms(rset, terms, n, &numTerms);
845
846     sset->hits = 0;
847     num_criteria = sort_sequence->num_specs;
848     if (num_criteria > ZSET_SORT_MAX_LEVEL)
849         num_criteria = ZSET_SORT_MAX_LEVEL;
850     for (i = 0; i < num_criteria; i++)
851     {
852         Z_SortKeySpec *sks = sort_sequence->specs[i];
853         Z_SortKey *sk;
854         ZEBRA_RES res;
855
856         sort_criteria[i].ord = -1;
857         sort_criteria[i].numerical = 0;
858
859         if (sks->which == Z_SortKeySpec_missingValueData)
860         {
861             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
862             return ZEBRA_FAIL;
863         }
864         if (*sks->sortRelation == Z_SortKeySpec_ascending)
865             sort_criteria[i].relation = 'A';
866         else if (*sks->sortRelation == Z_SortKeySpec_descending)
867             sort_criteria[i].relation = 'D';
868         else
869         {
870             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
871             return ZEBRA_FAIL;
872         }
873         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
874         {
875             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
876             return ZEBRA_FAIL;
877         }
878         else if (sks->sortElement->which != Z_SortElement_generic)
879         {
880             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
881             return ZEBRA_FAIL;
882         }       
883         sk = sks->sortElement->u.generic;
884         switch (sk->which)
885         {
886         case Z_SortKey_sortField:
887             yaz_log(log_level_sort, "key %d is of type sortField",
888                     i+1);
889             sort_criteria[i].numerical = 0;
890             sort_criteria[i].ord = 
891                 zebraExplain_lookup_attr_str(zh->reg->zei,
892                                              zinfo_index_category_sort,
893                                              -1, sk->u.sortField);
894             if (sks->which != Z_SortKeySpec_null
895                 && sort_criteria[i].ord == -1)
896             {
897                 zebra_setError(zh,
898                                YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
899                 return ZEBRA_FAIL;
900             }
901             break;
902         case Z_SortKey_elementSpec:
903             yaz_log(log_level_sort, "key %d is of type elementSpec",
904                     i+1);
905             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
906             return ZEBRA_FAIL;
907         case Z_SortKey_sortAttributes:
908             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
909             res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
910
911                                      &sort_criteria[i].ord,
912                                      &sort_criteria[i].numerical);
913             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
914                 return ZEBRA_FAIL;
915             break;
916         }
917         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord,
918                                     &sort_criteria[i].index_type,
919                                     0, 0))
920         {
921             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
922             return ZEBRA_FAIL;
923         }
924     }
925     /* allocate space for each cmpare buf + one extra for tmp comparison */
926     for (i = 0; i<num_criteria; i++)
927     {
928         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
929                              * SORT_IDX_ENTRYSIZE);
930         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
931     }
932     rfd = rset_open (rset, RSETF_READ);
933     while (rset_read (rfd, &key, &termid))
934     {
935         zint this_sys = key.mem[sysno_mem_index];
936         if (log_level_searchhits)
937             key_logdump_txt(log_level_searchhits, &key, termid->name);
938         kno++;
939         if (this_sys != psysno)
940         {
941             (sset->hits)++;
942             psysno = this_sys;
943             resultSetInsertSort(zh, sset,
944                                 sort_criteria, num_criteria, psysno, cmp_buf,
945                                 tmp_cmp_buf);
946         }
947     }
948     rset_close (rfd);
949
950     for (i = 0; i<num_criteria; i++)
951     {
952         xfree(cmp_buf[i]);
953         xfree(tmp_cmp_buf[i]);
954     }
955
956     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
957             kno, sset->hits);   
958     for (i = 0; i < numTerms; i++)
959         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
960                  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
961     *sort_status = Z_SortResponse_success;
962     return ZEBRA_OK;
963 }
964
965 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
966 {
967     ZebraSet s;
968
969     if ((s = resultSetGet (zh, resultSetId)))
970         return s->rset;
971     return NULL;
972 }
973
974 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
975                         RSET rset, NMEM nmem)
976 {
977     struct it_key key;
978     TERMID termid;
979     TERMID *terms;
980     zint kno = 0;
981     int numTerms = 0;
982     int n = 0;
983     int i;
984     ZebraRankClass rank_class;
985     struct zset_sort_info *sort_info;
986     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
987     size_t sysno_mem_index = 0;
988
989     if (zh->m_staticrank)
990         sysno_mem_index = 1;
991
992     if (!log_level_set)
993         loglevels();
994     sort_info = zebraSet->sort_info;
995     sort_info->num_entries = 0;
996     zebraSet->hits = 0;
997     zebraSet->estimated_hit_count = 0;
998     rset_getterms(rset, 0, 0, &n);
999     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1000     rset_getterms(rset, terms, n, &numTerms);
1001
1002
1003     rank_class = zebraRankLookup(zh, rank_handler_name);
1004     if (!rank_class)
1005     {
1006         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1007         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1008         return ZEBRA_FAIL;
1009     }
1010     else
1011     {
1012         RSFD rfd = rset_open(rset, RSETF_READ);
1013         struct rank_control *rc = rank_class->control;
1014         int score;
1015         zint count = 0;
1016         
1017         void *handle =
1018             (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
1019                           terms, numTerms);
1020         zint psysno = 0;  /* previous doc id / sys no */
1021         zint pstaticrank = 0; /* previous static rank */
1022         int stop_flag = 0;
1023         while (rset_read(rfd, &key, &termid))
1024         {
1025             zint this_sys = key.mem[sysno_mem_index];
1026
1027             zint seqno = key.mem[key.len-1];
1028             kno++;
1029             if (log_level_searchhits)
1030                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1031             if (this_sys != psysno) 
1032             {   /* new record .. */
1033                 if (rfd->counted_items > rset->hits_limit)
1034                 {
1035                     zebraSet->estimated_hit_count = 1;
1036                     break;
1037                 }
1038                 if (psysno)
1039                 {   /* only if we did have a previous record */
1040                     score = (*rc->calc) (handle, psysno, pstaticrank,
1041                                          &stop_flag);
1042                     /* insert the hit. A=Ascending */
1043                     resultSetInsertRank (zh, sort_info, psysno, score, 'A');
1044                     count++;
1045                     if (stop_flag)
1046                         break;
1047                 }
1048                 psysno = this_sys;
1049                 if (zh->m_staticrank)
1050                     pstaticrank = key.mem[0];
1051             }
1052             (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
1053         }
1054         /* no more items */
1055         if (psysno)
1056         {   /* we had - at least - one record */
1057             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1058             /* insert the hit. A=Ascending */
1059             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1060             count++;
1061         }
1062         (*rc->end) (zh->reg, handle);
1063         rset_close (rfd);
1064     }
1065     zebraSet->hits = rset->hits_count;
1066
1067     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1068             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1069     for (i = 0; i < numTerms; i++)
1070     {
1071         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1072                 ZINT_FORMAT,
1073                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1074     }
1075     return ZEBRA_OK;
1076 }
1077
1078 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1079 {
1080     ZebraRankClass p = zh->reg->rank_classes;
1081     while (p && strcmp (p->control->name, name))
1082         p = p->next;
1083     if (p && !p->init_flag)
1084     {
1085         if (p->control->create)
1086             p->class_handle = (*p->control->create)(zh);
1087         p->init_flag = 1;
1088     }
1089     return p;
1090 }
1091
1092 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1093 {
1094     ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
1095     p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
1096     memcpy (p->control, ctrl, sizeof(*p->control));
1097     p->control->name = xstrdup (ctrl->name);
1098     p->init_flag = 0;
1099     p->next = reg->rank_classes;
1100     reg->rank_classes = p;
1101 }
1102
1103 void zebraRankDestroy(struct zebra_register *reg)
1104 {
1105     ZebraRankClass p = reg->rank_classes;
1106     while (p)
1107     {
1108         ZebraRankClass p_next = p->next;
1109         if (p->init_flag && p->control->destroy)
1110             (*p->control->destroy)(reg, p->class_handle);
1111         xfree(p->control->name);
1112         xfree(p->control);
1113         xfree(p);
1114         p = p_next;
1115     }
1116     reg->rank_classes = NULL;
1117 }
1118
1119 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1120                                  zint *hits_array, int *approx_array)
1121 {
1122     int no = 0;
1123     int i;
1124     for (i = 0; i<rset->no_children; i++)
1125         no += trav_rset_for_termids(rset->children[i],
1126                                     (termid_array ? termid_array + no : 0),
1127                                     (hits_array ? hits_array + no : 0),
1128                                     (approx_array ? approx_array + no : 0));
1129     if (rset->term)
1130     {
1131         if (termid_array)
1132             termid_array[no] = rset->term;
1133         if (hits_array)
1134             hits_array[no] = rset->hits_count;
1135         if (approx_array)
1136             approx_array[no] = rset->hits_approx;
1137 #if 0
1138         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1139                 " count=" ZINT_FORMAT,
1140                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1141 #endif
1142         no++;
1143     }
1144     return no;
1145 }
1146
1147 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1148                                    int *num_terms)
1149 {
1150     ZebraSet sset = resultSetGet(zh, setname);
1151     *num_terms = 0;
1152     if (sset)
1153     {
1154         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1155         return ZEBRA_OK;
1156     }
1157     return ZEBRA_FAIL;
1158 }
1159
1160 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1161                                      int no, zint *count, int *approx,
1162                                      char *termbuf, size_t *termlen,
1163                                      const char **term_ref_id)
1164 {
1165     ZebraSet sset = resultSetGet(zh, setname);
1166     if (sset)
1167     {
1168         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1169         if (no >= 0 && no < num_terms)
1170         {
1171             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1172             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1173             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1174             
1175             trav_rset_for_termids(sset->rset, term_array,
1176                                   hits_array, approx_array);
1177
1178             if (count)
1179                 *count = hits_array[no];
1180             if (approx)
1181                 *approx = approx_array[no];
1182             if (termbuf)
1183             {
1184                 char *inbuf = term_array[no]->name;
1185                 size_t inleft = strlen(inbuf);
1186                 size_t outleft = *termlen - 1;
1187
1188                 if (zh->iconv_from_utf8 != 0)
1189                 {
1190                     char *outbuf = termbuf;
1191                     size_t ret;
1192                     
1193                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1194                                     &outbuf, &outleft);
1195                     if (ret == (size_t)(-1))
1196                         *termlen = 0;
1197                     else
1198                         *termlen = outbuf - termbuf;
1199                 }
1200                 else
1201                 {
1202                     if (inleft > outleft)
1203                         inleft = outleft;
1204                     *termlen = inleft;
1205                     memcpy(termbuf, inbuf, *termlen);
1206                 }
1207                 termbuf[*termlen] = '\0';
1208             }
1209             if (term_ref_id)
1210                 *term_ref_id = term_array[no]->ref_id;
1211
1212             xfree(term_array);
1213             xfree(hits_array);
1214             xfree(approx_array);
1215             return ZEBRA_OK;
1216         }
1217     }
1218     return ZEBRA_FAIL;
1219 }
1220
1221 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1222                                     zint sysno, zebra_snippets *snippets)
1223 {
1224     ZebraSet sset = resultSetGet(zh, setname);
1225     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1226             setname, sysno);
1227     if (!sset)
1228         return ZEBRA_FAIL;
1229     else
1230     {
1231         struct rset_key_control *kc = zebra_key_control_create(zh);
1232         NMEM nmem = nmem_create();
1233         struct it_key key;
1234         RSET rsets[2], rset_comb;
1235         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1236                                           res_get (zh->res, "setTmpDir"),0 );
1237         
1238         TERMID termid;
1239         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1240         
1241         key.mem[0] = sysno;
1242         key.mem[1] = 0;
1243         key.mem[2] = 0;
1244         key.mem[3] = 0;
1245         key.len = 2;
1246         rset_write (rsfd, &key);
1247         rset_close (rsfd);
1248
1249         rsets[0] = rset_temp;
1250         rsets[1] = rset_dup(sset->rset);
1251         
1252         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1253
1254         rsfd = rset_open(rset_comb, RSETF_READ);
1255
1256         while (rset_read(rsfd, &key, &termid))
1257         {
1258             if (termid)
1259             {
1260                 struct ord_list *ol;
1261                 for (ol = termid->ol; ol; ol = ol->next)
1262                 {
1263                     zebra_snippets_append(snippets, key.mem[key.len-1],
1264                                           ol->ord, termid->name);
1265                 }
1266             }
1267         }
1268         rset_close(rsfd);
1269         
1270         rset_delete(rset_comb);
1271         nmem_destroy(nmem);
1272         kc->dec(kc);
1273     }
1274     return ZEBRA_OK;
1275 }
1276
1277 /*
1278  * Local variables:
1279  * c-basic-offset: 4
1280  * indent-tabs-mode: nil
1281  * End:
1282  * vim: shiftwidth=4 tabstop=8 expandtab
1283  */
1284