Use yaz_iconv flushing.
[idzebra-moved-to-github.git] / index / zsets.c
1 /* $Id: zsets.c,v 1.120 2007-03-20 22:07:35 adam Exp $
2    Copyright (C) 1995-2007
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31
32 #include "index.h"
33 #include "rank.h"
34 #include <yaz/diagbib1.h>
35 #include <rset.h>
36
37 #define ZSET_SORT_MAX_LEVEL 10
38
39 struct zebra_set_term_entry {
40     int reg_type;
41     char *db;
42     char *index_name;
43     char *term;
44 };
45
46 struct zebra_set {
47     char *name;
48     RSET rset;
49     NMEM nmem;
50     NMEM rset_nmem; /* for creating the rsets in */
51     zint hits;
52     int num_bases;
53     char **basenames;
54     Z_RPNQuery *rpn;
55     Z_SortKeySpecList *sortSpec;
56     struct zset_sort_info *sort_info;
57     struct zebra_set_term_entry *term_entries;
58     int term_entries_max;
59     struct zebra_set *next;
60     int locked;
61     int estimated_hit_count;
62
63     zint cache_position;  /* last position */
64     RSFD cache_rfd;       /* rfd (NULL if not existing) */
65     zint cache_psysno;    /* sysno for last position */
66     zint approx_limit;    /* limit before we do approx */
67 };
68
69 struct zset_sort_entry {
70     zint sysno;
71     int score;
72 };
73
74 struct zset_sort_info {
75     int max_entries;
76     int num_entries;
77     struct zset_sort_entry *all_entries;
78     struct zset_sort_entry **entries;
79 };
80
81 static int log_level_set=0;
82 static int log_level_sort=0;
83 static int log_level_searchhits=0;
84 static int log_level_searchterms=0;
85 static int log_level_resultsets=0;
86
87 static void loglevels(void)
88 {
89     if (log_level_set)
90         return;
91     log_level_sort = yaz_log_module_level("sorting");
92     log_level_searchhits = yaz_log_module_level("searchhits");
93     log_level_searchterms = yaz_log_module_level("searchterms");
94     log_level_resultsets = yaz_log_module_level("resultsets");
95     log_level_set = 1;
96 }
97
98
99 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
100                                  Z_RPNQuery *rpn, ZebraSet sset)
101 {
102     RSET rset = 0;
103     oident *attrset;
104     Z_SortKeySpecList *sort_sequence;
105     int sort_status, i;
106     ZEBRA_RES res = ZEBRA_OK;
107
108     sort_sequence = (Z_SortKeySpecList *)
109         nmem_malloc(nmem, sizeof(*sort_sequence));
110     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
111     sort_sequence->specs = (Z_SortKeySpec **)
112         nmem_malloc(nmem, sort_sequence->num_specs *
113                      sizeof(*sort_sequence->specs));
114     for (i = 0; i<sort_sequence->num_specs; i++)
115         sort_sequence->specs[i] = 0;
116     
117     attrset = oid_getentbyoid (rpn->attributeSetId);
118
119     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
120
121     res = rpn_search_top(zh, rpn->RPNStructure, attrset->value,
122                          nmem, rset_nmem,
123                          sort_sequence,
124                          sset->num_bases, sset->basenames,
125                          &rset);
126     if (res != ZEBRA_OK)
127     {
128         sset->rset = 0;
129         return res;
130     }
131     for (i = 0; sort_sequence->specs[i]; i++)
132         ;
133     sort_sequence->num_specs = i;
134     rset->hits_limit = sset->approx_limit;
135     if (!i)
136     {
137         res = resultSetRank (zh, sset, rset, rset_nmem);
138     }
139     else
140     {
141         res = resultSetSortSingle (zh, nmem, sset, rset,
142                                    sort_sequence, &sort_status);
143     }
144     sset->rset = rset;
145     return res;
146 }
147
148
149 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
150                           int num_bases, char **basenames,
151                           const char *setname,
152                           zint *hits, int *estimated_hit_count)
153 {
154     ZebraSet zebraSet;
155     int i;
156     ZEBRA_RES res;
157
158     *hits = 0;
159     *estimated_hit_count = 0;
160
161     zebraSet = resultSetAdd(zh, setname, 1);
162     if (!zebraSet)
163         return ZEBRA_FAIL;
164     zebraSet->locked = 1;
165     zebraSet->rpn = 0;
166     zebraSet->nmem = m;
167     zebraSet->rset_nmem = nmem_create(); 
168
169     zebraSet->num_bases = num_bases;
170     zebraSet->basenames = 
171         nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
172     for (i = 0; i<num_bases; i++)
173         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
174
175     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
176                           rpn, zebraSet);
177     *hits = zebraSet->hits;
178     if (zebraSet->estimated_hit_count)
179         *estimated_hit_count = 1;
180
181     if (zebraSet->rset)
182         zebraSet->rpn = rpn;
183     zebraSet->locked = 0;
184     if (!zebraSet->rset)
185         return ZEBRA_FAIL;
186     return res;
187 }
188
189 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
190                       const char *db, const char *index_name, 
191                       const char *term)
192 {
193     assert(zh); /* compiler shut up */
194     if (!s->nmem)
195         s->nmem = nmem_create ();
196     if (!s->term_entries)
197     {
198         int i;
199         s->term_entries_max = 1000;
200         s->term_entries =
201             nmem_malloc (s->nmem, s->term_entries_max * 
202                          sizeof(*s->term_entries));
203         for (i = 0; i < s->term_entries_max; i++)
204             s->term_entries[i].term = 0;
205     }
206     if (s->hits < s->term_entries_max)
207     {
208         s->term_entries[s->hits].reg_type = reg_type;
209         s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
210         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
211         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
212     }
213     (s->hits)++;
214 }
215
216 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
217 {
218     ZebraSet s;
219     int i;
220
221     for (s = zh->sets; s; s = s->next)
222         if (!strcmp (s->name, name))
223             break;
224     
225     if (!log_level_set)
226         loglevels();
227     if (s)
228     {
229         yaz_log(log_level_resultsets, "updating result set %s", name);
230         if (!ov || s->locked)
231             return NULL;
232         if (s->rset)
233         {
234             if (s->cache_rfd)
235                 rset_close(s->cache_rfd);
236             rset_delete (s->rset);
237         }
238         if (s->rset_nmem)
239             nmem_destroy (s->rset_nmem);
240         if (s->nmem)
241             nmem_destroy (s->nmem);
242     }
243     else
244     {
245         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
246
247         yaz_log(log_level_resultsets, "adding result set %s", name);
248         s = (ZebraSet) xmalloc (sizeof(*s));
249         s->next = zh->sets;
250         zh->sets = s;
251         s->name = (char *) xmalloc (strlen(name)+1);
252         strcpy (s->name, name);
253
254         s->sort_info = (struct zset_sort_info *)
255             xmalloc (sizeof(*s->sort_info));
256         s->sort_info->max_entries = atoi(sort_max_str);
257         if (s->sort_info->max_entries < 2)
258             s->sort_info->max_entries = 2;
259
260         s->sort_info->entries = (struct zset_sort_entry **)
261             xmalloc (sizeof(*s->sort_info->entries) *
262                      s->sort_info->max_entries);
263         s->sort_info->all_entries = (struct zset_sort_entry *)
264             xmalloc (sizeof(*s->sort_info->all_entries) *
265                      s->sort_info->max_entries);
266         for (i = 0; i < s->sort_info->max_entries; i++)
267             s->sort_info->entries[i] = s->sort_info->all_entries + i;
268     }
269     s->locked = 0;
270     s->term_entries = 0;
271     s->hits = 0;
272     s->rset = 0;
273     s->rset_nmem = 0;
274     s->nmem = 0;
275     s->rpn = 0;
276     s->sortSpec = 0;
277     s->cache_position = 0;
278     s->cache_rfd = 0;
279     s->approx_limit = zh->approx_limit;
280     s->estimated_hit_count = 0;
281     return s;
282 }
283
284 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
285 {
286     ZebraSet s;
287
288     for (s = zh->sets; s; s = s->next)
289         if (!strcmp (s->name, name))
290         {
291             if (!s->term_entries && !s->rset && s->rpn)
292             {
293                 NMEM nmem = nmem_create ();
294                 yaz_log(log_level_resultsets, "research %s", name);
295                 if (!s->rset_nmem)
296                     s->rset_nmem=nmem_create();
297                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
298                 if (s->rset && s->sortSpec)
299                 {
300                     int sort_status;
301                     yaz_log(log_level_resultsets, "resort %s", name);
302                     resultSetSortSingle (zh, nmem, s, s->rset, s->sortSpec,
303                                          &sort_status);
304                 }
305                 nmem_destroy (nmem);
306             }
307             return s;
308         }
309     return NULL;
310 }
311
312 void resultSetInvalidate (ZebraHandle zh)
313 {
314     ZebraSet s = zh->sets;
315     
316     yaz_log(log_level_resultsets, "invalidating result sets");
317     for (; s; s = s->next)
318     {
319         if (s->rset)
320         {
321             if (s->cache_rfd)
322                 rset_close(s->cache_rfd);
323             rset_delete (s->rset);
324         }
325         s->rset = 0;
326         s->cache_rfd = 0;
327         s->cache_position = 0;
328         if (s->rset_nmem)
329             nmem_destroy(s->rset_nmem);
330         s->rset_nmem=0;
331     }
332 }
333
334 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
335 {
336     ZebraSet * ss = &zh->sets;
337     int i;
338     
339     if (statuses)
340         for (i = 0; i<num; i++)
341             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
342     while (*ss)
343     {
344         int i = -1;
345         ZebraSet s = *ss;
346         if (num >= 0)
347         {
348             for (i = 0; i<num; i++)
349                 if (!strcmp (s->name, names[i]))
350                 {
351                     if (statuses)
352                         statuses[i] = Z_DeleteStatus_success;
353                     i = -1;
354                     break;
355                 }
356         }
357         if (i < 0)
358         {
359             *ss = s->next;
360             
361             xfree (s->sort_info->all_entries);
362             xfree (s->sort_info->entries);
363             xfree (s->sort_info);
364             
365             if (s->nmem)
366                 nmem_destroy (s->nmem);
367             if (s->rset)
368             {
369                 if (s->cache_rfd)
370                     rset_close(s->cache_rfd);
371                 rset_delete (s->rset);
372             }
373             if (s->rset_nmem)
374                 nmem_destroy(s->rset_nmem);
375             xfree (s->name);
376             xfree (s);
377         }
378         else
379             ss = &s->next;
380     }
381 }
382
383 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
384                                                  const char *name, 
385                                                  zint start, int num)
386 {
387     zint pos_small[10];
388     zint *pos = pos_small;
389     ZebraMetaRecord *mr;
390     int i;
391
392     if (num > 10000 || num <= 0)
393         return 0;
394
395     if (num > 10)
396         pos = xmalloc(sizeof(*pos) * num);
397     
398     for (i = 0; i<num; i++)
399         pos[i] = start+i;
400
401     mr = zebra_meta_records_create(zh, name, num, pos);
402     
403     if (num > 10)
404         xfree(pos);
405     return mr;
406 }
407
408 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
409                                            int num, zint *positions)
410 {
411     ZebraSet sset;
412     ZebraMetaRecord *sr = 0;
413     RSET rset;
414     int i;
415     struct zset_sort_info *sort_info;
416     size_t sysno_mem_index = 0;
417
418     if (zh->m_staticrank)
419         sysno_mem_index = 1;
420
421     if (!log_level_set)
422         loglevels();
423     if (!(sset = resultSetGet (zh, name)))
424         return NULL;
425     if (!(rset = sset->rset))
426     {
427         if (!sset->term_entries)
428             return 0;
429         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
430         for (i = 0; i<num; i++)
431         {
432             sr[i].sysno = 0;
433             sr[i].score = -1;
434             sr[i].term = 0;
435             sr[i].db = 0;
436
437             if (positions[i] <= sset->term_entries_max)
438             {
439                 sr[i].term = sset->term_entries[positions[i]-1].term;
440                 sr[i].db = sset->term_entries[positions[i]-1].db;
441             }
442         }
443     }
444     else
445     {
446         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
447         for (i = 0; i<num; i++)
448         {
449             sr[i].sysno = 0;
450             sr[i].score = -1;
451             sr[i].term = 0;
452             sr[i].db = 0;
453         }
454         sort_info = sset->sort_info;
455         if (sort_info)
456         {
457             zint position;
458             
459             for (i = 0; i<num; i++)
460             {
461                 position = positions[i];
462                 if (position > 0 && position <= sort_info->num_entries)
463                 {
464                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
465                             " (sorted)", position);
466                     sr[i].sysno = sort_info->entries[position-1]->sysno;
467                     sr[i].score = sort_info->entries[position-1]->score;
468                 }
469             }
470         }
471         /* did we really get all entries using sort ? */
472         for (i = 0; i<num; i++)
473         {
474             if (!sr[i].sysno)
475                 break;
476         }
477         if (i < num) /* nope, get the rest, unsorted - sorry */
478         {
479             zint position = 0;
480             int num_i = 0;
481             zint psysno = 0;
482             RSFD rfd;
483             struct it_key key;
484             
485             if (sort_info)
486                 position = sort_info->num_entries;
487             while (num_i < num && positions[num_i] <= position)
488                 num_i++;
489             
490             if (sset->cache_rfd &&
491                 num_i < num && positions[num_i] > sset->cache_position)
492             {
493                 position = sset->cache_position;
494                 rfd = sset->cache_rfd;
495                 psysno = sset->cache_psysno;
496             }
497             else
498             {
499                 if (sset->cache_rfd)
500                     rset_close(sset->cache_rfd);
501                 rfd = rset_open (rset, RSETF_READ);
502             }
503             while (num_i < num && rset_read (rfd, &key, 0))
504             {
505                 zint this_sys = key.mem[sysno_mem_index];
506                 if (this_sys != psysno)
507                 {
508                     psysno = this_sys;
509                     if (sort_info)
510                     {
511                         /* determine we alreay have this in our set */
512                         for (i = sort_info->num_entries; --i >= 0; )
513                             if (psysno == sort_info->entries[i]->sysno)
514                                 break;
515                         if (i >= 0)
516                             continue;
517                     }
518                     position++;
519                     assert (num_i < num);
520                     if (position == positions[num_i])
521                     {
522                         sr[num_i].sysno = psysno;
523                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
524                         sr[num_i].score = -1;
525                         num_i++;
526                     }
527                 }
528             }
529             sset->cache_position = position;
530             sset->cache_psysno = psysno;
531             sset->cache_rfd = rfd;
532         }
533     }
534     return sr;
535 }
536
537 void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records,
538                                  int num)
539 {
540     assert(zh); /* compiler shut up about unused arg */
541     xfree (records);
542 }
543
544 struct sortKeyInfo {
545     int relation;
546     int ord;
547     int numerical;
548     int index_type;
549 };
550
551 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
552                          struct sortKeyInfo *criteria, int num_criteria,
553                          zint sysno,
554                          char *cmp_buf[], char *tmp_cmp_buf[])
555 {
556     struct zset_sort_entry *new_entry = NULL;
557     struct zset_sort_info *sort_info = sset->sort_info;
558     int i, j;
559
560     zebra_sort_sysno(zh->reg->sort_index, sysno);
561     for (i = 0; i<num_criteria; i++)
562     {
563         char *this_entry_buf = tmp_cmp_buf[i];
564         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
565         if (criteria[i].ord != -1)
566         {
567             zebra_sort_type(zh->reg->sort_index, criteria[i].ord);
568             zebra_sort_read(zh->reg->sort_index, this_entry_buf);
569         }
570     }
571     i = sort_info->num_entries;
572     while (--i >= 0)
573     {
574         int rel = 0;
575         for (j = 0; j<num_criteria; j++)
576         {
577             char *this_entry_buf = tmp_cmp_buf[j];
578             char *other_entry_buf = 
579                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
580             if (criteria[j].numerical)
581             {
582                 char this_entry_org[1024];
583                 char other_entry_org[1024];
584                 double diff;
585                 int index_type = criteria[j].index_type;
586                 zebra_term_untrans(zh, index_type, this_entry_org,
587                                    this_entry_buf);
588                 zebra_term_untrans(zh, index_type, other_entry_org,
589                                    other_entry_buf);
590                 diff = atof(this_entry_org) - atof(other_entry_org);
591                 
592                 if (diff > 0.0)
593                     rel = 1;
594                 else if (diff < 0.0)
595                     rel = -1;
596                 else
597                     rel = 0;
598             }
599             else
600             {
601                 rel = memcmp(this_entry_buf, other_entry_buf,
602                              SORT_IDX_ENTRYSIZE);
603             }
604             if (rel)
605                 break;
606         }       
607         if (!rel)
608             break;
609         if (criteria[j].relation == 'A')
610         {
611             if (rel > 0)
612                 break;
613         }
614         else if (criteria[j].relation == 'D')
615         {
616             if (rel < 0)
617                 break;
618         }
619     }
620     ++i;
621     j = sort_info->max_entries;
622     if (i == j)
623         return;
624
625     if (sort_info->num_entries == j)
626         --j;
627     else
628         j = (sort_info->num_entries)++;
629     new_entry = sort_info->entries[j];
630     while (j != i)
631     {
632         int k;
633         for (k = 0; k<num_criteria; k++)
634         {
635             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
636             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
637             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
638         }
639         sort_info->entries[j] = sort_info->entries[j-1];
640         --j;
641     }
642     sort_info->entries[i] = new_entry;
643     assert (new_entry);
644     for (i = 0; i<num_criteria; i++)
645     {
646         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
647         char *this_entry_buf = tmp_cmp_buf[i];
648         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
649     }
650     new_entry->sysno = sysno;
651     new_entry->score = -1;
652 }
653
654 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
655                          zint sysno, int score, int relation)
656 {
657     struct zset_sort_entry *new_entry = NULL;
658     int i, j;
659     assert(zh); /* compiler shut up about unused arg */
660
661     i = sort_info->num_entries;
662     while (--i >= 0)
663     {
664         int rel = 0;
665
666         rel = score - sort_info->entries[i]->score;
667
668         if (relation == 'D')
669         {
670             if (rel >= 0)
671                 break;
672         }
673         else if (relation == 'A')
674         {
675             if (rel <= 0)
676                 break;
677         }
678     }
679     ++i;
680     j = sort_info->max_entries;
681     if (i == j)
682         return;
683
684     if (sort_info->num_entries == j)
685         --j;
686     else
687         j = (sort_info->num_entries)++;
688     
689     new_entry = sort_info->entries[j];
690     while (j != i)
691     {
692         sort_info->entries[j] = sort_info->entries[j-1];
693         --j;
694     }
695     sort_info->entries[i] = new_entry;
696     assert (new_entry);
697     new_entry->sysno = sysno;
698     new_entry->score = score;
699 }
700
701 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
702 {
703     Z_RPNQuery *dst = 0;
704     ODR encode = odr_createmem(ODR_ENCODE);
705     ODR decode = odr_createmem(ODR_DECODE);
706
707     if (z_RPNQuery(encode, &src, 0, 0))
708     {
709         int len;
710         char *buf = odr_getbuf(encode, &len, 0);
711
712         if (buf)
713         {
714             odr_setbuf(decode, buf, len, 0);
715             z_RPNQuery(decode, &dst, 0, 0);
716         }
717     }
718     nmem_transfer(nmem, decode->mem);
719     odr_destroy(encode);
720     odr_destroy(decode);
721     return dst;
722 }
723
724 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
725 {
726     Z_SortKeySpecList *dst = 0;
727     ODR encode = odr_createmem(ODR_ENCODE);
728     ODR decode = odr_createmem(ODR_DECODE);
729
730     if (z_SortKeySpecList(encode, &src, 0, 0))
731     {
732         int len;
733         char *buf = odr_getbuf(encode, &len, 0);
734
735         if (buf)
736         {
737             odr_setbuf(decode, buf, len, 0);
738             z_SortKeySpecList(decode, &dst, 0, 0);
739         }
740     }
741     nmem_transfer(nmem, decode->mem);
742     odr_destroy(encode);
743     odr_destroy(decode);
744     return dst;
745 }
746
747 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
748                         ZebraSet rset)
749 {
750     ZebraSet nset;
751     int i;
752
753     nset = resultSetAdd(zh, setname, 1);
754     if (!nset)
755         return 0;
756
757     nset->nmem = nmem_create();
758
759     nset->num_bases = rset->num_bases;
760     nset->basenames = 
761         nmem_malloc (nset->nmem, nset->num_bases * sizeof(*rset->basenames));
762     for (i = 0; i<rset->num_bases; i++)
763         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
764
765     if (rset->rset)
766         nset->rset = rset_dup(rset->rset);
767     if (rset->rpn)
768         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
769     return nset;
770 }
771
772 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
773                         int num_input_setnames, const char **input_setnames,
774                         const char *output_setname,
775                         Z_SortKeySpecList *sort_sequence, int *sort_status)
776 {
777     ZebraSet sset;
778     RSET rset;
779
780     if (num_input_setnames == 0)
781     {
782         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
783         return ZEBRA_FAIL;
784     }
785     if (num_input_setnames > 1)
786     {
787         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
788         return ZEBRA_FAIL;
789     }
790     if (!log_level_set)
791         loglevels();
792     yaz_log(log_level_sort, "result set sort input=%s output=%s",
793           *input_setnames, output_setname);
794     sset = resultSetGet (zh, input_setnames[0]);
795     if (!sset)
796     {
797         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
798                        input_setnames[0]);
799         return ZEBRA_FAIL;
800     }
801     if (!(rset = sset->rset))
802     {
803         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
804                        input_setnames[0]);
805         return ZEBRA_FAIL;
806     }
807     if (strcmp (output_setname, input_setnames[0]))
808         sset = resultSetClone(zh, output_setname, sset);
809     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
810     return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
811                                 sort_status);
812 }
813
814 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
815                               ZebraSet sset, RSET rset,
816                               Z_SortKeySpecList *sort_sequence,
817                               int *sort_status)
818 {
819     int i;
820     int n = 0;
821     zint kno = 0;
822     zint psysno = 0;
823     struct it_key key;
824     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
825     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
826     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
827     int num_criteria;
828     RSFD rfd;
829     TERMID termid;
830     TERMID *terms;
831     int numTerms = 0;
832     size_t sysno_mem_index = 0;
833
834     if (zh->m_staticrank)
835         sysno_mem_index = 1;
836
837     assert(nmem); /* compiler shut up about unused param */
838     sset->sort_info->num_entries = 0;
839
840     rset_getterms(rset, 0, 0, &n);
841     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
842     rset_getterms(rset, terms, n, &numTerms);
843
844     sset->hits = 0;
845     num_criteria = sort_sequence->num_specs;
846     if (num_criteria > ZSET_SORT_MAX_LEVEL)
847         num_criteria = ZSET_SORT_MAX_LEVEL;
848     for (i = 0; i < num_criteria; i++)
849     {
850         Z_SortKeySpec *sks = sort_sequence->specs[i];
851         Z_SortKey *sk;
852         ZEBRA_RES res;
853
854         sort_criteria[i].ord = -1;
855         sort_criteria[i].numerical = 0;
856
857         if (sks->which == Z_SortKeySpec_missingValueData)
858         {
859             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
860             return ZEBRA_FAIL;
861         }
862         if (*sks->sortRelation == Z_SortKeySpec_ascending)
863             sort_criteria[i].relation = 'A';
864         else if (*sks->sortRelation == Z_SortKeySpec_descending)
865             sort_criteria[i].relation = 'D';
866         else
867         {
868             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
869             return ZEBRA_FAIL;
870         }
871         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
872         {
873             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
874             return ZEBRA_FAIL;
875         }
876         else if (sks->sortElement->which != Z_SortElement_generic)
877         {
878             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
879             return ZEBRA_FAIL;
880         }       
881         sk = sks->sortElement->u.generic;
882         switch (sk->which)
883         {
884         case Z_SortKey_sortField:
885             yaz_log(log_level_sort, "key %d is of type sortField",
886                     i+1);
887             sort_criteria[i].numerical = 0;
888             sort_criteria[i].ord = 
889                 zebraExplain_lookup_attr_str(zh->reg->zei,
890                                              zinfo_index_category_sort,
891                                              -1, sk->u.sortField);
892             if (sks->which != Z_SortKeySpec_null
893                 && sort_criteria[i].ord == -1)
894             {
895                 zebra_setError(zh,
896                                YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
897                 return ZEBRA_FAIL;
898             }
899             break;
900         case Z_SortKey_elementSpec:
901             yaz_log(log_level_sort, "key %d is of type elementSpec",
902                     i+1);
903             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
904             return ZEBRA_FAIL;
905         case Z_SortKey_sortAttributes:
906             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
907             res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
908
909                                      &sort_criteria[i].ord,
910                                      &sort_criteria[i].numerical);
911             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
912                 return ZEBRA_FAIL;
913             break;
914         }
915         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord,
916                                     &sort_criteria[i].index_type,
917                                     0, 0))
918         {
919             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
920             return ZEBRA_FAIL;
921         }
922     }
923     /* allocate space for each cmpare buf + one extra for tmp comparison */
924     for (i = 0; i<num_criteria; i++)
925     {
926         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
927                              * SORT_IDX_ENTRYSIZE);
928         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
929     }
930     rfd = rset_open (rset, RSETF_READ);
931     while (rset_read (rfd, &key, &termid))
932     {
933         zint this_sys = key.mem[sysno_mem_index];
934         if (log_level_searchhits)
935             key_logdump_txt(log_level_searchhits, &key, termid->name);
936         kno++;
937         if (this_sys != psysno)
938         {
939             if ((sset->hits & 255) == 0 && zh->break_handler_func)
940             {
941                 if (zh->break_handler_func(zh->break_handler_data))
942                 {
943                     rset_set_hits_limit(rset, 0);
944                     break;
945                 }
946             }
947             (sset->hits)++;
948             psysno = this_sys;
949             resultSetInsertSort(zh, sset,
950                                 sort_criteria, num_criteria, psysno, cmp_buf,
951                                 tmp_cmp_buf);
952         }
953     }
954     rset_close (rfd);
955
956     for (i = 0; i<num_criteria; i++)
957     {
958         xfree(cmp_buf[i]);
959         xfree(tmp_cmp_buf[i]);
960     }
961
962     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
963             kno, sset->hits);   
964     for (i = 0; i < numTerms; i++)
965         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
966                  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
967     *sort_status = Z_SortResponse_success;
968     return ZEBRA_OK;
969 }
970
971 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
972 {
973     ZebraSet s;
974
975     if ((s = resultSetGet (zh, resultSetId)))
976         return s->rset;
977     return NULL;
978 }
979
980 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
981                         RSET rset, NMEM nmem)
982 {
983     struct it_key key;
984     TERMID termid;
985     TERMID *terms;
986     zint kno = 0;
987     int numTerms = 0;
988     int n = 0;
989     int i;
990     ZebraRankClass rank_class;
991     struct zset_sort_info *sort_info;
992     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
993     size_t sysno_mem_index = 0;
994
995     if (zh->m_staticrank)
996         sysno_mem_index = 1;
997
998     if (!log_level_set)
999         loglevels();
1000     sort_info = zebraSet->sort_info;
1001     sort_info->num_entries = 0;
1002     zebraSet->hits = 0;
1003     zebraSet->estimated_hit_count = 0;
1004     rset_getterms(rset, 0, 0, &n);
1005     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1006     rset_getterms(rset, terms, n, &numTerms);
1007
1008     rank_class = zebraRankLookup(zh, rank_handler_name);
1009     if (!rank_class)
1010     {
1011         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1012         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1013         return ZEBRA_FAIL;
1014     }
1015     else
1016     {
1017         RSFD rfd = rset_open(rset, RSETF_READ);
1018         struct rank_control *rc = rank_class->control;
1019         int score;
1020         zint count = 0;
1021         void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1022                                      nmem, terms, numTerms);
1023         zint psysno = 0;  /* previous doc id / sys no */
1024         zint pstaticrank = 0; /* previous static rank */
1025         int stop_flag = 0;
1026         while (rset_read(rfd, &key, &termid))
1027         {
1028             zint this_sys = key.mem[sysno_mem_index];
1029
1030             zint seqno = key.mem[key.len-1];
1031             kno++;
1032             if (log_level_searchhits)
1033                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1034             if (this_sys != psysno) 
1035             {   /* new record .. */
1036                 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1037                 {
1038                     if (zh->break_handler_func(zh->break_handler_data))
1039                     {
1040                         yaz_log(YLOG_LOG, "Aborted search");
1041                         stop_flag = 1;
1042                     }
1043                 }
1044                 if (rfd->counted_items > rset->hits_limit)
1045                     stop_flag = 1;
1046                 if (psysno)
1047                 {   /* only if we did have a previous record */
1048                     score = (*rc->calc) (handle, psysno, pstaticrank,
1049                                          &stop_flag);
1050                     /* insert the hit. A=Ascending */
1051                     resultSetInsertRank (zh, sort_info, psysno, score, 'A');
1052                     count++;
1053                 }
1054                 if (stop_flag)
1055                 {
1056                     zebraSet->estimated_hit_count = 1;
1057                     rset_set_hits_limit(rset, 0);
1058                     break;
1059                 }
1060                 psysno = this_sys;
1061                 if (zh->m_staticrank)
1062                     pstaticrank = key.mem[0];
1063             }
1064             (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
1065         }
1066         /* no more items */
1067         if (psysno)
1068         {   /* we had - at least - one record */
1069             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1070             /* insert the hit. A=Ascending */
1071             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1072             count++;
1073         }
1074         (*rc->end) (zh->reg, handle);
1075         rset_close (rfd);
1076     }
1077     zebraSet->hits = rset->hits_count;
1078
1079     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1080             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1081     for (i = 0; i < numTerms; i++)
1082     {
1083         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1084                 ZINT_FORMAT,
1085                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1086     }
1087     return ZEBRA_OK;
1088 }
1089
1090 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1091 {
1092     ZebraRankClass p = zh->reg->rank_classes;
1093     while (p && strcmp (p->control->name, name))
1094         p = p->next;
1095     if (p && !p->init_flag)
1096     {
1097         if (p->control->create)
1098             p->class_handle = (*p->control->create)(zh);
1099         p->init_flag = 1;
1100     }
1101     return p;
1102 }
1103
1104 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1105 {
1106     ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
1107     p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
1108     memcpy (p->control, ctrl, sizeof(*p->control));
1109     p->control->name = xstrdup (ctrl->name);
1110     p->init_flag = 0;
1111     p->next = reg->rank_classes;
1112     reg->rank_classes = p;
1113 }
1114
1115 void zebraRankDestroy(struct zebra_register *reg)
1116 {
1117     ZebraRankClass p = reg->rank_classes;
1118     while (p)
1119     {
1120         ZebraRankClass p_next = p->next;
1121         if (p->init_flag && p->control->destroy)
1122             (*p->control->destroy)(reg, p->class_handle);
1123         xfree(p->control->name);
1124         xfree(p->control);
1125         xfree(p);
1126         p = p_next;
1127     }
1128     reg->rank_classes = NULL;
1129 }
1130
1131 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1132                                  zint *hits_array, int *approx_array)
1133 {
1134     int no = 0;
1135     int i;
1136     for (i = 0; i<rset->no_children; i++)
1137         no += trav_rset_for_termids(rset->children[i],
1138                                     (termid_array ? termid_array + no : 0),
1139                                     (hits_array ? hits_array + no : 0),
1140                                     (approx_array ? approx_array + no : 0));
1141     if (rset->term)
1142     {
1143         if (termid_array)
1144             termid_array[no] = rset->term;
1145         if (hits_array)
1146             hits_array[no] = rset->hits_count;
1147         if (approx_array)
1148             approx_array[no] = rset->hits_approx;
1149 #if 0
1150         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1151                 " count=" ZINT_FORMAT,
1152                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1153 #endif
1154         no++;
1155     }
1156     return no;
1157 }
1158
1159 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1160                                    int *num_terms)
1161 {
1162     ZebraSet sset = resultSetGet(zh, setname);
1163     *num_terms = 0;
1164     if (sset)
1165     {
1166         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1167         return ZEBRA_OK;
1168     }
1169     return ZEBRA_FAIL;
1170 }
1171
1172 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1173                                      int no, zint *count, int *approx,
1174                                      char *termbuf, size_t *termlen,
1175                                      const char **term_ref_id)
1176 {
1177     ZebraSet sset = resultSetGet(zh, setname);
1178     if (sset)
1179     {
1180         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1181         if (no >= 0 && no < num_terms)
1182         {
1183             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1184             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1185             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1186             
1187             trav_rset_for_termids(sset->rset, term_array,
1188                                   hits_array, approx_array);
1189
1190             if (count)
1191                 *count = hits_array[no];
1192             if (approx)
1193                 *approx = approx_array[no];
1194             if (termbuf)
1195             {
1196                 char *inbuf = term_array[no]->name;
1197                 size_t inleft = strlen(inbuf);
1198                 size_t outleft = *termlen - 1;
1199
1200                 if (zh->iconv_from_utf8 != 0)
1201                 {
1202                     char *outbuf = termbuf;
1203                     size_t ret;
1204                     
1205                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1206                                     &outbuf, &outleft);
1207                     if (ret == (size_t)(-1))
1208                         *termlen = 0;
1209                     else
1210                     {
1211                         yaz_iconv(zh->iconv_from_utf8, 0, 0, 
1212                                   &outbuf, &outleft);
1213                         *termlen = outbuf - termbuf;
1214                     }
1215                 }
1216                 else
1217                 {
1218                     if (inleft > outleft)
1219                         inleft = outleft;
1220                     *termlen = inleft;
1221                     memcpy(termbuf, inbuf, *termlen);
1222                 }
1223                 termbuf[*termlen] = '\0';
1224             }
1225             if (term_ref_id)
1226                 *term_ref_id = term_array[no]->ref_id;
1227
1228             xfree(term_array);
1229             xfree(hits_array);
1230             xfree(approx_array);
1231             return ZEBRA_OK;
1232         }
1233     }
1234     return ZEBRA_FAIL;
1235 }
1236
1237 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1238                                     zint sysno, zebra_snippets *snippets)
1239 {
1240     ZebraSet sset = resultSetGet(zh, setname);
1241     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1242             setname, sysno);
1243     if (!sset)
1244         return ZEBRA_FAIL;
1245     else
1246     {
1247         struct rset_key_control *kc = zebra_key_control_create(zh);
1248         NMEM nmem = nmem_create();
1249         struct it_key key;
1250         RSET rsets[2], rset_comb;
1251         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1252                                           res_get (zh->res, "setTmpDir"),0 );
1253         
1254         TERMID termid;
1255         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1256         
1257         key.mem[0] = sysno;
1258         key.mem[1] = 0;
1259         key.mem[2] = 0;
1260         key.mem[3] = 0;
1261         key.len = 2;
1262         rset_write (rsfd, &key);
1263         rset_close (rsfd);
1264
1265         rsets[0] = rset_temp;
1266         rsets[1] = rset_dup(sset->rset);
1267         
1268         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1269
1270         rsfd = rset_open(rset_comb, RSETF_READ);
1271
1272         while (rset_read(rsfd, &key, &termid))
1273         {
1274             if (termid)
1275             {
1276                 struct ord_list *ol;
1277                 for (ol = termid->ol; ol; ol = ol->next)
1278                 {
1279                     zebra_snippets_append(snippets, key.mem[key.len-1],
1280                                           ol->ord, termid->name);
1281                 }
1282             }
1283         }
1284         rset_close(rsfd);
1285         
1286         rset_delete(rset_comb);
1287         nmem_destroy(nmem);
1288         kc->dec(kc);
1289     }
1290     return ZEBRA_OK;
1291 }
1292
1293 /*
1294  * Local variables:
1295  * c-basic-offset: 4
1296  * indent-tabs-mode: nil
1297  * End:
1298  * vim: shiftwidth=4 tabstop=8 expandtab
1299  */
1300