a20056b77ddad7176796a79469f435e1d6f99a70
[idzebra-moved-to-github.git] / index / zsets.c
1 /* $Id: zsets.c,v 1.114 2006-12-18 23:40:08 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31
32 #include "index.h"
33 #include "rank.h"
34 #include <yaz/diagbib1.h>
35 #include <rset.h>
36
37 #define ZSET_SORT_MAX_LEVEL 10
38
39 struct zebra_set_term_entry {
40     int reg_type;
41     char *db;
42     char *index_name;
43     char *term;
44 };
45
46 struct zebra_set {
47     char *name;
48     RSET rset;
49     NMEM nmem;
50     NMEM rset_nmem; /* for creating the rsets in */
51     zint hits;
52     int num_bases;
53     char **basenames;
54     Z_RPNQuery *rpn;
55     Z_SortKeySpecList *sortSpec;
56     struct zset_sort_info *sort_info;
57     struct zebra_set_term_entry *term_entries;
58     int term_entries_max;
59     struct zebra_set *next;
60     int locked;
61
62     zint cache_position;  /* last position */
63     RSFD cache_rfd;       /* rfd (NULL if not existing) */
64     zint cache_psysno;    /* sysno for last position */
65     zint approx_limit;    /* limit before we do approx */
66 };
67
68 struct zset_sort_entry {
69     zint sysno;
70     int score;
71 };
72
73 struct zset_sort_info {
74     int max_entries;
75     int num_entries;
76     struct zset_sort_entry *all_entries;
77     struct zset_sort_entry **entries;
78 };
79
80 static int log_level_set=0;
81 static int log_level_sort=0;
82 static int log_level_searchhits=0;
83 static int log_level_searchterms=0;
84 static int log_level_resultsets=0;
85
86 static void loglevels(void)
87 {
88     if (log_level_set)
89         return;
90     log_level_sort = yaz_log_module_level("sorting");
91     log_level_searchhits = yaz_log_module_level("searchhits");
92     log_level_searchterms = yaz_log_module_level("searchterms");
93     log_level_resultsets = yaz_log_module_level("resultsets");
94     log_level_set = 1;
95 }
96
97
98 ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
99                           Z_RPNQuery *rpn, ZebraSet sset)
100 {
101     RSET rset = 0;
102     oident *attrset;
103     Z_SortKeySpecList *sort_sequence;
104     int sort_status, i;
105     ZEBRA_RES res = ZEBRA_OK;
106
107     zh->hits = 0;
108
109     sort_sequence = (Z_SortKeySpecList *)
110         nmem_malloc(nmem, sizeof(*sort_sequence));
111     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
112     sort_sequence->specs = (Z_SortKeySpec **)
113         nmem_malloc(nmem, sort_sequence->num_specs *
114                      sizeof(*sort_sequence->specs));
115     for (i = 0; i<sort_sequence->num_specs; i++)
116         sort_sequence->specs[i] = 0;
117     
118     attrset = oid_getentbyoid (rpn->attributeSetId);
119
120     rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
121
122     res = rpn_search_top(zh, rpn->RPNStructure, attrset->value,
123                          nmem, rset_nmem,
124                          sort_sequence,
125                          sset->num_bases, sset->basenames,
126                          &rset);
127     if (res != ZEBRA_OK)
128     {
129         sset->rset = 0;
130         return res;
131     }
132     for (i = 0; sort_sequence->specs[i]; i++)
133         ;
134     sort_sequence->num_specs = i;
135     rset->hits_limit = sset->approx_limit;
136     if (!i)
137     {
138         res = resultSetRank (zh, sset, rset, rset_nmem);
139     }
140     else
141     {
142         res = resultSetSortSingle (zh, nmem, sset, rset,
143                                    sort_sequence, &sort_status);
144     }
145     sset->rset = rset;
146     return res;
147 }
148
149
150 ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
151                            int num_bases, char **basenames,
152                            const char *setname)
153 {
154     ZebraSet zebraSet;
155     int i;
156     ZEBRA_RES res;
157
158     zh->hits = 0;
159
160     zebraSet = resultSetAdd(zh, setname, 1);
161     if (!zebraSet)
162         return ZEBRA_FAIL;
163     zebraSet->locked = 1;
164     zebraSet->rpn = 0;
165     zebraSet->nmem = m;
166     zebraSet->rset_nmem = nmem_create(); 
167
168     zebraSet->num_bases = num_bases;
169     zebraSet->basenames = 
170         nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
171     for (i = 0; i<num_bases; i++)
172         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
173
174     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
175                           rpn, zebraSet);
176     zh->hits = zebraSet->hits;
177     if (zebraSet->rset)
178         zebraSet->rpn = rpn;
179     zebraSet->locked = 0;
180     if (!zebraSet->rset)
181         return ZEBRA_FAIL;
182     return res;
183 }
184
185 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
186                       const char *db, const char *index_name, 
187                       const char *term)
188 {
189     assert(zh); /* compiler shut up */
190     if (!s->nmem)
191         s->nmem = nmem_create ();
192     if (!s->term_entries)
193     {
194         int i;
195         s->term_entries_max = 1000;
196         s->term_entries =
197             nmem_malloc (s->nmem, s->term_entries_max * 
198                          sizeof(*s->term_entries));
199         for (i = 0; i < s->term_entries_max; i++)
200             s->term_entries[i].term = 0;
201     }
202     if (s->hits < s->term_entries_max)
203     {
204         s->term_entries[s->hits].reg_type = reg_type;
205         s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
206         s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
207         s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
208     }
209     (s->hits)++;
210 }
211
212 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
213 {
214     ZebraSet s;
215     int i;
216
217     for (s = zh->sets; s; s = s->next)
218         if (!strcmp (s->name, name))
219             break;
220     
221     if (!log_level_set)
222         loglevels();
223     if (s)
224     {
225         yaz_log(log_level_resultsets, "updating result set %s", name);
226         if (!ov || s->locked)
227             return NULL;
228         if (s->rset)
229         {
230             if (s->cache_rfd)
231                 rset_close(s->cache_rfd);
232             rset_delete (s->rset);
233         }
234         if (s->rset_nmem)
235             nmem_destroy (s->rset_nmem);
236         if (s->nmem)
237             nmem_destroy (s->nmem);
238     }
239     else
240     {
241         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
242
243         yaz_log(log_level_resultsets, "adding result set %s", name);
244         s = (ZebraSet) xmalloc (sizeof(*s));
245         s->next = zh->sets;
246         zh->sets = s;
247         s->name = (char *) xmalloc (strlen(name)+1);
248         strcpy (s->name, name);
249
250         s->sort_info = (struct zset_sort_info *)
251             xmalloc (sizeof(*s->sort_info));
252         s->sort_info->max_entries = atoi(sort_max_str);
253         if (s->sort_info->max_entries < 2)
254             s->sort_info->max_entries = 2;
255
256         s->sort_info->entries = (struct zset_sort_entry **)
257             xmalloc (sizeof(*s->sort_info->entries) *
258                      s->sort_info->max_entries);
259         s->sort_info->all_entries = (struct zset_sort_entry *)
260             xmalloc (sizeof(*s->sort_info->all_entries) *
261                      s->sort_info->max_entries);
262         for (i = 0; i < s->sort_info->max_entries; i++)
263             s->sort_info->entries[i] = s->sort_info->all_entries + i;
264     }
265     s->locked = 0;
266     s->term_entries = 0;
267     s->hits = 0;
268     s->rset = 0;
269     s->rset_nmem = 0;
270     s->nmem = 0;
271     s->rpn = 0;
272     s->sortSpec = 0;
273     s->cache_position = 0;
274     s->cache_rfd = 0;
275     s->approx_limit = zh->approx_limit;
276     return s;
277 }
278
279 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
280 {
281     ZebraSet s;
282
283     for (s = zh->sets; s; s = s->next)
284         if (!strcmp (s->name, name))
285         {
286             if (!s->term_entries && !s->rset && s->rpn)
287             {
288                 NMEM nmem = nmem_create ();
289                 yaz_log(log_level_resultsets, "research %s", name);
290                 if (!s->rset_nmem)
291                     s->rset_nmem=nmem_create();
292                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
293                 if (s->rset && s->sortSpec)
294                 {
295                     int sort_status;
296                     yaz_log(log_level_resultsets, "resort %s", name);
297                     resultSetSortSingle (zh, nmem, s, s->rset, s->sortSpec,
298                                          &sort_status);
299                 }
300                 nmem_destroy (nmem);
301             }
302             return s;
303         }
304     return NULL;
305 }
306
307 void resultSetInvalidate (ZebraHandle zh)
308 {
309     ZebraSet s = zh->sets;
310     
311     yaz_log(log_level_resultsets, "invalidating result sets");
312     for (; s; s = s->next)
313     {
314         if (s->rset)
315         {
316             if (s->cache_rfd)
317                 rset_close(s->cache_rfd);
318             rset_delete (s->rset);
319         }
320         s->rset = 0;
321         s->cache_rfd = 0;
322         s->cache_position = 0;
323         if (s->rset_nmem)
324             nmem_destroy(s->rset_nmem);
325         s->rset_nmem=0;
326     }
327 }
328
329 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
330 {
331     ZebraSet * ss = &zh->sets;
332     int i;
333     
334     if (statuses)
335         for (i = 0; i<num; i++)
336             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
337     while (*ss)
338     {
339         int i = -1;
340         ZebraSet s = *ss;
341         if (num >= 0)
342         {
343             for (i = 0; i<num; i++)
344                 if (!strcmp (s->name, names[i]))
345                 {
346                     if (statuses)
347                         statuses[i] = Z_DeleteStatus_success;
348                     i = -1;
349                     break;
350                 }
351         }
352         if (i < 0)
353         {
354             *ss = s->next;
355             
356             xfree (s->sort_info->all_entries);
357             xfree (s->sort_info->entries);
358             xfree (s->sort_info);
359             
360             if (s->nmem)
361                 nmem_destroy (s->nmem);
362             if (s->rset)
363             {
364                 if (s->cache_rfd)
365                     rset_close(s->cache_rfd);
366                 rset_delete (s->rset);
367             }
368             if (s->rset_nmem)
369                 nmem_destroy(s->rset_nmem);
370             xfree (s->name);
371             xfree (s);
372         }
373         else
374             ss = &s->next;
375     }
376 }
377
378 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
379                                                  const char *name, 
380                                                  zint start, int num)
381 {
382     zint pos_small[10];
383     zint *pos = pos_small;
384     ZebraMetaRecord *mr;
385     int i;
386
387     if (num > 10000 || num <= 0)
388         return 0;
389
390     if (num > 10)
391         pos = xmalloc(sizeof(*pos) * num);
392     
393     for (i = 0; i<num; i++)
394         pos[i] = start+i;
395
396     mr = zebra_meta_records_create(zh, name, num, pos);
397     
398     if (num > 10)
399         xfree(pos);
400     return mr;
401 }
402
403 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
404                                            int num, zint *positions)
405 {
406     ZebraSet sset;
407     ZebraMetaRecord *sr = 0;
408     RSET rset;
409     int i;
410     struct zset_sort_info *sort_info;
411     size_t sysno_mem_index = 0;
412
413     if (zh->m_staticrank)
414         sysno_mem_index = 1;
415
416     if (!log_level_set)
417         loglevels();
418     if (!(sset = resultSetGet (zh, name)))
419         return NULL;
420     if (!(rset = sset->rset))
421     {
422         if (!sset->term_entries)
423             return 0;
424         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
425         for (i = 0; i<num; i++)
426         {
427             sr[i].sysno = 0;
428             sr[i].score = -1;
429             sr[i].term = 0;
430             sr[i].db = 0;
431
432             if (positions[i] <= sset->term_entries_max)
433             {
434                 sr[i].term = sset->term_entries[positions[i]-1].term;
435                 sr[i].db = sset->term_entries[positions[i]-1].db;
436             }
437         }
438     }
439     else
440     {
441         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
442         for (i = 0; i<num; i++)
443         {
444             sr[i].sysno = 0;
445             sr[i].score = -1;
446             sr[i].term = 0;
447             sr[i].db = 0;
448         }
449         sort_info = sset->sort_info;
450         if (sort_info)
451         {
452             zint position;
453             
454             for (i = 0; i<num; i++)
455             {
456                 position = positions[i];
457                 if (position > 0 && position <= sort_info->num_entries)
458                 {
459                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
460                             " (sorted)", position);
461                     sr[i].sysno = sort_info->entries[position-1]->sysno;
462                     sr[i].score = sort_info->entries[position-1]->score;
463                 }
464             }
465         }
466         /* did we really get all entries using sort ? */
467         for (i = 0; i<num; i++)
468         {
469             if (!sr[i].sysno)
470                 break;
471         }
472         if (i < num) /* nope, get the rest, unsorted - sorry */
473         {
474             zint position = 0;
475             int num_i = 0;
476             zint psysno = 0;
477             RSFD rfd;
478             struct it_key key;
479             
480             if (sort_info)
481                 position = sort_info->num_entries;
482             while (num_i < num && positions[num_i] <= position)
483                 num_i++;
484             
485             if (sset->cache_rfd &&
486                 num_i < num && positions[num_i] > sset->cache_position)
487             {
488                 position = sset->cache_position;
489                 rfd = sset->cache_rfd;
490                 psysno = sset->cache_psysno;
491             }
492             else
493             {
494                 if (sset->cache_rfd)
495                     rset_close(sset->cache_rfd);
496                 rfd = rset_open (rset, RSETF_READ);
497             }
498             while (num_i < num && rset_read (rfd, &key, 0))
499             {
500                 zint this_sys = key.mem[sysno_mem_index];
501                 if (this_sys != psysno)
502                 {
503                     psysno = this_sys;
504                     if (sort_info)
505                     {
506                         /* determine we alreay have this in our set */
507                         for (i = sort_info->num_entries; --i >= 0; )
508                             if (psysno == sort_info->entries[i]->sysno)
509                                 break;
510                         if (i >= 0)
511                             continue;
512                     }
513                     position++;
514                     assert (num_i < num);
515                     if (position == positions[num_i])
516                     {
517                         sr[num_i].sysno = psysno;
518                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
519                         sr[num_i].score = -1;
520                         num_i++;
521                     }
522                 }
523             }
524             sset->cache_position = position;
525             sset->cache_psysno = psysno;
526             sset->cache_rfd = rfd;
527         }
528     }
529     return sr;
530 }
531
532 void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records,
533                                  int num)
534 {
535     assert(zh); /* compiler shut up about unused arg */
536     xfree (records);
537 }
538
539 struct sortKeyInfo {
540     int relation;
541     int ord;
542     int numerical;
543     int index_type;
544 };
545
546 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
547                          struct sortKeyInfo *criteria, int num_criteria,
548                          zint sysno,
549                          char *cmp_buf[], char *tmp_cmp_buf[])
550 {
551     struct zset_sort_entry *new_entry = NULL;
552     struct zset_sort_info *sort_info = sset->sort_info;
553     int i, j;
554
555     zebra_sort_sysno(zh->reg->sort_index, sysno);
556     for (i = 0; i<num_criteria; i++)
557     {
558         char *this_entry_buf = tmp_cmp_buf[i];
559         memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
560         if (criteria[i].ord != -1)
561         {
562             zebra_sort_type(zh->reg->sort_index, criteria[i].ord);
563             zebra_sort_read(zh->reg->sort_index, this_entry_buf);
564         }
565     }
566     i = sort_info->num_entries;
567     while (--i >= 0)
568     {
569         int rel = 0;
570         for (j = 0; j<num_criteria; j++)
571         {
572             char *this_entry_buf = tmp_cmp_buf[j];
573             char *other_entry_buf = 
574                 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
575             if (criteria[j].numerical)
576             {
577                 char this_entry_org[1024];
578                 char other_entry_org[1024];
579                 double diff;
580                 int index_type = criteria[j].index_type;
581                 zebra_term_untrans(zh, index_type, this_entry_org,
582                                    this_entry_buf);
583                 zebra_term_untrans(zh, index_type, other_entry_org,
584                                    other_entry_buf);
585                 diff = atof(this_entry_org) - atof(other_entry_org);
586                 
587                 if (diff > 0.0)
588                     rel = 1;
589                 else if (diff < 0.0)
590                     rel = -1;
591                 else
592                     rel = 0;
593             }
594             else
595             {
596                 rel = memcmp(this_entry_buf, other_entry_buf,
597                              SORT_IDX_ENTRYSIZE);
598             }
599             if (rel)
600                 break;
601         }       
602         if (!rel)
603             break;
604         if (criteria[j].relation == 'A')
605         {
606             if (rel > 0)
607                 break;
608         }
609         else if (criteria[j].relation == 'D')
610         {
611             if (rel < 0)
612                 break;
613         }
614     }
615     ++i;
616     j = sort_info->max_entries;
617     if (i == j)
618         return;
619
620     if (sort_info->num_entries == j)
621         --j;
622     else
623         j = (sort_info->num_entries)++;
624     new_entry = sort_info->entries[j];
625     while (j != i)
626     {
627         int k;
628         for (k = 0; k<num_criteria; k++)
629         {
630             char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
631             char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
632             memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
633         }
634         sort_info->entries[j] = sort_info->entries[j-1];
635         --j;
636     }
637     sort_info->entries[i] = new_entry;
638     assert (new_entry);
639     for (i = 0; i<num_criteria; i++)
640     {
641         char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
642         char *this_entry_buf = tmp_cmp_buf[i];
643         memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
644     }
645     new_entry->sysno = sysno;
646     new_entry->score = -1;
647 }
648
649 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
650                          zint sysno, int score, int relation)
651 {
652     struct zset_sort_entry *new_entry = NULL;
653     int i, j;
654     assert(zh); /* compiler shut up about unused arg */
655
656     i = sort_info->num_entries;
657     while (--i >= 0)
658     {
659         int rel = 0;
660
661         rel = score - sort_info->entries[i]->score;
662
663         if (relation == 'D')
664         {
665             if (rel >= 0)
666                 break;
667         }
668         else if (relation == 'A')
669         {
670             if (rel <= 0)
671                 break;
672         }
673     }
674     ++i;
675     j = sort_info->max_entries;
676     if (i == j)
677         return;
678
679     if (sort_info->num_entries == j)
680         --j;
681     else
682         j = (sort_info->num_entries)++;
683     
684     new_entry = sort_info->entries[j];
685     while (j != i)
686     {
687         sort_info->entries[j] = sort_info->entries[j-1];
688         --j;
689     }
690     sort_info->entries[i] = new_entry;
691     assert (new_entry);
692     new_entry->sysno = sysno;
693     new_entry->score = score;
694 }
695
696 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
697 {
698     Z_RPNQuery *dst = 0;
699     ODR encode = odr_createmem(ODR_ENCODE);
700     ODR decode = odr_createmem(ODR_DECODE);
701
702     if (z_RPNQuery(encode, &src, 0, 0))
703     {
704         int len;
705         char *buf = odr_getbuf(encode, &len, 0);
706
707         if (buf)
708         {
709             odr_setbuf(decode, buf, len, 0);
710             z_RPNQuery(decode, &dst, 0, 0);
711         }
712     }
713     nmem_transfer(nmem, decode->mem);
714     odr_destroy(encode);
715     odr_destroy(decode);
716     return dst;
717 }
718
719 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
720 {
721     Z_SortKeySpecList *dst = 0;
722     ODR encode = odr_createmem(ODR_ENCODE);
723     ODR decode = odr_createmem(ODR_DECODE);
724
725     if (z_SortKeySpecList(encode, &src, 0, 0))
726     {
727         int len;
728         char *buf = odr_getbuf(encode, &len, 0);
729
730         if (buf)
731         {
732             odr_setbuf(decode, buf, len, 0);
733             z_SortKeySpecList(decode, &dst, 0, 0);
734         }
735     }
736     nmem_transfer(nmem, decode->mem);
737     odr_destroy(encode);
738     odr_destroy(decode);
739     return dst;
740 }
741
742 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
743                         ZebraSet rset)
744 {
745     ZebraSet nset;
746     int i;
747
748     nset = resultSetAdd(zh, setname, 1);
749     if (!nset)
750         return 0;
751
752     nset->nmem = nmem_create();
753
754     nset->num_bases = rset->num_bases;
755     nset->basenames = 
756         nmem_malloc (nset->nmem, nset->num_bases * sizeof(*rset->basenames));
757     for (i = 0; i<rset->num_bases; i++)
758         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
759
760     if (rset->rset)
761         nset->rset = rset_dup(rset->rset);
762     if (rset->rpn)
763         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
764     return nset;
765 }
766
767 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
768                         int num_input_setnames, const char **input_setnames,
769                         const char *output_setname,
770                         Z_SortKeySpecList *sort_sequence, int *sort_status)
771 {
772     ZebraSet sset;
773     RSET rset;
774
775     if (num_input_setnames == 0)
776     {
777         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
778         return ZEBRA_FAIL;
779     }
780     if (num_input_setnames > 1)
781     {
782         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
783         return ZEBRA_FAIL;
784     }
785     if (!log_level_set)
786         loglevels();
787     yaz_log(log_level_sort, "result set sort input=%s output=%s",
788           *input_setnames, output_setname);
789     sset = resultSetGet (zh, input_setnames[0]);
790     if (!sset)
791     {
792         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
793                        input_setnames[0]);
794         return ZEBRA_FAIL;
795     }
796     if (!(rset = sset->rset))
797     {
798         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
799                        input_setnames[0]);
800         return ZEBRA_FAIL;
801     }
802     if (strcmp (output_setname, input_setnames[0]))
803         sset = resultSetClone(zh, output_setname, sset);
804     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
805     return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
806                                 sort_status);
807 }
808
809 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
810                               ZebraSet sset, RSET rset,
811                               Z_SortKeySpecList *sort_sequence,
812                               int *sort_status)
813 {
814     int i;
815     int n = 0;
816     zint kno = 0;
817     zint psysno = 0;
818     struct it_key key;
819     struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
820     char *cmp_buf[ZSET_SORT_MAX_LEVEL];
821     char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
822     int num_criteria;
823     RSFD rfd;
824     TERMID termid;
825     TERMID *terms;
826     int numTerms = 0;
827     size_t sysno_mem_index = 0;
828
829     if (zh->m_staticrank)
830         sysno_mem_index = 1;
831
832     assert(nmem); /* compiler shut up about unused param */
833     sset->sort_info->num_entries = 0;
834
835     rset_getterms(rset, 0, 0, &n);
836     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
837     rset_getterms(rset, terms, n, &numTerms);
838
839     sset->hits = 0;
840     num_criteria = sort_sequence->num_specs;
841     if (num_criteria > ZSET_SORT_MAX_LEVEL)
842         num_criteria = ZSET_SORT_MAX_LEVEL;
843     for (i = 0; i < num_criteria; i++)
844     {
845         Z_SortKeySpec *sks = sort_sequence->specs[i];
846         Z_SortKey *sk;
847         ZEBRA_RES res;
848
849         sort_criteria[i].ord = -1;
850         sort_criteria[i].numerical = 0;
851
852         if (sks->which == Z_SortKeySpec_missingValueData)
853         {
854             zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
855             return ZEBRA_FAIL;
856         }
857         if (*sks->sortRelation == Z_SortKeySpec_ascending)
858             sort_criteria[i].relation = 'A';
859         else if (*sks->sortRelation == Z_SortKeySpec_descending)
860             sort_criteria[i].relation = 'D';
861         else
862         {
863             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
864             return ZEBRA_FAIL;
865         }
866         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
867         {
868             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
869             return ZEBRA_FAIL;
870         }
871         else if (sks->sortElement->which != Z_SortElement_generic)
872         {
873             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
874             return ZEBRA_FAIL;
875         }       
876         sk = sks->sortElement->u.generic;
877         switch (sk->which)
878         {
879         case Z_SortKey_sortField:
880             yaz_log(log_level_sort, "key %d is of type sortField",
881                     i+1);
882             sort_criteria[i].numerical = 0;
883             sort_criteria[i].ord = 
884                 zebraExplain_lookup_attr_str(zh->reg->zei,
885                                              zinfo_index_category_sort,
886                                              -1, sk->u.sortField);
887             if (sks->which != Z_SortKeySpec_null
888                 && sort_criteria[i].ord == -1)
889             {
890                 zebra_setError(zh,
891                                YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
892                 return ZEBRA_FAIL;
893             }
894             break;
895         case Z_SortKey_elementSpec:
896             yaz_log(log_level_sort, "key %d is of type elementSpec",
897                     i+1);
898             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
899             return ZEBRA_FAIL;
900         case Z_SortKey_sortAttributes:
901             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
902             res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
903
904                                      &sort_criteria[i].ord,
905                                      &sort_criteria[i].numerical);
906             if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
907                 return ZEBRA_FAIL;
908             break;
909         }
910         if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord,
911                                     &sort_criteria[i].index_type,
912                                     0, 0))
913         {
914             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
915             return ZEBRA_FAIL;
916         }
917     }
918     /* allocate space for each cmpare buf + one extra for tmp comparison */
919     for (i = 0; i<num_criteria; i++)
920     {
921         cmp_buf[i] = xmalloc(sset->sort_info->max_entries
922                              * SORT_IDX_ENTRYSIZE);
923         tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
924     }
925     rfd = rset_open (rset, RSETF_READ);
926     while (rset_read (rfd, &key, &termid))
927     {
928         zint this_sys = key.mem[sysno_mem_index];
929         if (log_level_searchhits)
930             key_logdump_txt(log_level_searchhits, &key, termid->name);
931         kno++;
932         if (this_sys != psysno)
933         {
934             (sset->hits)++;
935             psysno = this_sys;
936             resultSetInsertSort(zh, sset,
937                                 sort_criteria, num_criteria, psysno, cmp_buf,
938                                 tmp_cmp_buf);
939         }
940     }
941     rset_close (rfd);
942
943     for (i = 0; i<num_criteria; i++)
944     {
945         xfree(cmp_buf[i]);
946         xfree(tmp_cmp_buf[i]);
947     }
948
949     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
950             kno, sset->hits);   
951     for (i = 0; i < numTerms; i++)
952         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
953                  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
954     *sort_status = Z_SortResponse_success;
955     return ZEBRA_OK;
956 }
957
958 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
959 {
960     ZebraSet s;
961
962     if ((s = resultSetGet (zh, resultSetId)))
963         return s->rset;
964     return NULL;
965 }
966
967 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
968                         RSET rset, NMEM nmem)
969 {
970     struct it_key key;
971     TERMID termid;
972     TERMID *terms;
973     zint kno = 0;
974     int numTerms = 0;
975     int n = 0;
976     int i;
977     ZebraRankClass rank_class;
978     struct zset_sort_info *sort_info;
979     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
980     size_t sysno_mem_index = 0;
981
982     if (zh->m_staticrank)
983         sysno_mem_index = 1;
984
985     if (!log_level_set)
986         loglevels();
987     sort_info = zebraSet->sort_info;
988     sort_info->num_entries = 0;
989     zebraSet->hits = 0;
990     rset_getterms(rset, 0, 0, &n);
991     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
992     rset_getterms(rset, terms, n, &numTerms);
993
994
995     rank_class = zebraRankLookup(zh, rank_handler_name);
996     if (!rank_class)
997     {
998         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
999         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1000         return ZEBRA_FAIL;
1001     }
1002     else
1003     {
1004         RSFD rfd = rset_open(rset, RSETF_READ);
1005         struct rank_control *rc = rank_class->control;
1006         int score;
1007         zint count = 0;
1008         
1009         void *handle =
1010             (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
1011                           terms, numTerms);
1012         zint psysno = 0;  /* previous doc id / sys no */
1013         zint pstaticrank = 0; /* previous static rank */
1014         int stop_flag = 0;
1015         while (rset_read(rfd, &key, &termid))
1016         {
1017             zint this_sys = key.mem[sysno_mem_index];
1018
1019             zint seqno = key.mem[key.len-1];
1020             kno++;
1021             if (log_level_searchhits)
1022                 key_logdump_txt(log_level_searchhits, &key, termid->name);
1023             if (this_sys != psysno) 
1024             {   /* new record .. */
1025                 if (rfd->counted_items > rset->hits_limit)
1026                     break;
1027                 if (psysno)
1028                 {   /* only if we did have a previous record */
1029                     score = (*rc->calc) (handle, psysno, pstaticrank,
1030                                          &stop_flag);
1031                     /* insert the hit. A=Ascending */
1032                     resultSetInsertRank (zh, sort_info, psysno, score, 'A');
1033                     count++;
1034                     if (stop_flag)
1035                         break;
1036                 }
1037                 psysno = this_sys;
1038                 if (zh->m_staticrank)
1039                     pstaticrank = key.mem[0];
1040             }
1041             (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
1042         }
1043         /* no more items */
1044         if (psysno)
1045         {   /* we had - at least - one record */
1046             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1047             /* insert the hit. A=Ascending */
1048             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1049             count++;
1050         }
1051         (*rc->end) (zh->reg, handle);
1052         rset_close (rfd);
1053     }
1054     zebraSet->hits = rset->hits_count;
1055
1056     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1057             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1058     for (i = 0; i < numTerms; i++)
1059     {
1060         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1061                 ZINT_FORMAT,
1062                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1063     }
1064     return ZEBRA_OK;
1065 }
1066
1067 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1068 {
1069     ZebraRankClass p = zh->reg->rank_classes;
1070     while (p && strcmp (p->control->name, name))
1071         p = p->next;
1072     if (p && !p->init_flag)
1073     {
1074         if (p->control->create)
1075             p->class_handle = (*p->control->create)(zh);
1076         p->init_flag = 1;
1077     }
1078     return p;
1079 }
1080
1081 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1082 {
1083     ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
1084     p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
1085     memcpy (p->control, ctrl, sizeof(*p->control));
1086     p->control->name = xstrdup (ctrl->name);
1087     p->init_flag = 0;
1088     p->next = reg->rank_classes;
1089     reg->rank_classes = p;
1090 }
1091
1092 void zebraRankDestroy(struct zebra_register *reg)
1093 {
1094     ZebraRankClass p = reg->rank_classes;
1095     while (p)
1096     {
1097         ZebraRankClass p_next = p->next;
1098         if (p->init_flag && p->control->destroy)
1099             (*p->control->destroy)(reg, p->class_handle);
1100         xfree(p->control->name);
1101         xfree(p->control);
1102         xfree(p);
1103         p = p_next;
1104     }
1105     reg->rank_classes = NULL;
1106 }
1107
1108 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1109                                  zint *hits_array, int *approx_array)
1110 {
1111     int no = 0;
1112     int i;
1113     for (i = 0; i<rset->no_children; i++)
1114         no += trav_rset_for_termids(rset->children[i],
1115                                     (termid_array ? termid_array + no : 0),
1116                                     (hits_array ? hits_array + no : 0),
1117                                     (approx_array ? approx_array + no : 0));
1118     if (rset->term)
1119     {
1120         if (termid_array)
1121             termid_array[no] = rset->term;
1122         if (hits_array)
1123             hits_array[no] = rset->hits_count;
1124         if (approx_array)
1125             approx_array[no] = rset->hits_approx;
1126 #if 0
1127         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1128                 " count=" ZINT_FORMAT,
1129                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1130 #endif
1131         no++;
1132     }
1133     return no;
1134 }
1135
1136 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1137                                    int *num_terms)
1138 {
1139     ZebraSet sset = resultSetGet(zh, setname);
1140     *num_terms = 0;
1141     if (sset)
1142     {
1143         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1144         return ZEBRA_OK;
1145     }
1146     return ZEBRA_FAIL;
1147 }
1148
1149 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1150                                      int no, zint *count, int *approx,
1151                                      char *termbuf, size_t *termlen,
1152                                      const char **term_ref_id)
1153 {
1154     ZebraSet sset = resultSetGet(zh, setname);
1155     if (sset)
1156     {
1157         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1158         if (no >= 0 && no < num_terms)
1159         {
1160             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1161             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1162             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1163             
1164             trav_rset_for_termids(sset->rset, term_array,
1165                                   hits_array, approx_array);
1166
1167             if (count)
1168                 *count = hits_array[no];
1169             if (approx)
1170                 *approx = approx_array[no];
1171             if (termbuf)
1172             {
1173                 char *inbuf = term_array[no]->name;
1174                 size_t inleft = strlen(inbuf);
1175                 size_t outleft = *termlen - 1;
1176
1177                 if (zh->iconv_from_utf8 != 0)
1178                 {
1179                     char *outbuf = termbuf;
1180                     size_t ret;
1181                     
1182                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1183                                     &outbuf, &outleft);
1184                     if (ret == (size_t)(-1))
1185                         *termlen = 0;
1186                     else
1187                         *termlen = outbuf - termbuf;
1188                 }
1189                 else
1190                 {
1191                     if (inleft > outleft)
1192                         inleft = outleft;
1193                     *termlen = inleft;
1194                     memcpy(termbuf, inbuf, *termlen);
1195                 }
1196                 termbuf[*termlen] = '\0';
1197             }
1198             if (term_ref_id)
1199                 *term_ref_id = term_array[no]->ref_id;
1200
1201             xfree(term_array);
1202             xfree(hits_array);
1203             xfree(approx_array);
1204             return ZEBRA_OK;
1205         }
1206     }
1207     return ZEBRA_FAIL;
1208 }
1209
1210 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1211                                     zint sysno, zebra_snippets *snippets)
1212 {
1213     ZebraSet sset = resultSetGet(zh, setname);
1214     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1215             setname, sysno);
1216     if (!sset)
1217         return ZEBRA_FAIL;
1218     else
1219     {
1220         struct rset_key_control *kc = zebra_key_control_create(zh);
1221         NMEM nmem = nmem_create();
1222         struct it_key key;
1223         RSET rsets[2], rset_comb;
1224         RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, 
1225                                           res_get (zh->res, "setTmpDir"),0 );
1226         
1227         TERMID termid;
1228         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1229         
1230         key.mem[0] = sysno;
1231         key.mem[1] = 0;
1232         key.mem[2] = 0;
1233         key.mem[3] = 0;
1234         key.len = 2;
1235         rset_write (rsfd, &key);
1236         rset_close (rsfd);
1237
1238         rsets[0] = rset_temp;
1239         rsets[1] = rset_dup(sset->rset);
1240         
1241         rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1242
1243         rsfd = rset_open(rset_comb, RSETF_READ);
1244
1245         while (rset_read(rsfd, &key, &termid))
1246         {
1247             if (termid)
1248             {
1249                 struct ord_list *ol;
1250                 for (ol = termid->ol; ol; ol = ol->next)
1251                 {
1252                     zebra_snippets_append(snippets, key.mem[key.len-1],
1253                                           ol->ord, termid->name);
1254                 }
1255             }
1256         }
1257         rset_close(rsfd);
1258         
1259         rset_delete(rset_comb);
1260         nmem_destroy(nmem);
1261         kc->dec(kc);
1262     }
1263     return ZEBRA_OK;
1264 }
1265
1266 /*
1267  * Local variables:
1268  * c-basic-offset: 4
1269  * indent-tabs-mode: nil
1270  * End:
1271  * vim: shiftwidth=4 tabstop=8 expandtab
1272  */
1273