65e527268aeb69f16e0ec3f37a76035c019bbe44
[idzebra-moved-to-github.git] / index / zsets.c
1 /* $Id: zsets.c,v 1.102 2006-05-18 12:03:05 adam Exp $
2    Copyright (C) 1995-2006
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26 #ifdef WIN32
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31
32 #include "index.h"
33 #include "rank.h"
34 #include <yaz/diagbib1.h>
35 #include <rset.h>
36
37 #define SORT_IDX_ENTRYSIZE 64
38 #define ZSET_SORT_MAX_LEVEL 3
39
40 struct zebra_set_term_entry {
41     int reg_type;
42     char *db;
43     int set;
44     int use;
45     char *term;
46 };
47
48 struct zebra_set {
49     char *name;
50     RSET rset;
51     NMEM nmem;
52     NMEM rset_nmem; /* for creating the rsets in */
53     zint hits;
54     int num_bases;
55     char **basenames;
56     Z_RPNQuery *rpn;
57     Z_SortKeySpecList *sortSpec;
58     struct zset_sort_info *sort_info;
59     struct zebra_set_term_entry *term_entries;
60     int term_entries_max;
61     struct zebra_set *next;
62     int locked;
63
64     zint cache_position;  /* last position */
65     RSFD cache_rfd;       /* rfd (NULL if not existing) */
66     zint cache_psysno;    /* sysno for last position */
67     zint approx_limit;    /* limit before we do approx */
68 };
69
70 struct zset_sort_entry {
71     zint sysno;
72     int score;
73     char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
74 };
75
76 struct zset_sort_info {
77     int max_entries;
78     int num_entries;
79     struct zset_sort_entry *all_entries;
80     struct zset_sort_entry **entries;
81 };
82
83 static int log_level_set=0;
84 static int log_level_sort=0;
85 static int log_level_searchhits=0;
86 static int log_level_searchterms=0;
87 static int log_level_resultsets=0;
88
89 static void loglevels()
90 {
91     if (log_level_set)
92         return;
93     log_level_sort = yaz_log_module_level("sorting");
94     log_level_searchhits = yaz_log_module_level("searchhits");
95     log_level_searchterms = yaz_log_module_level("searchterms");
96     log_level_resultsets = yaz_log_module_level("resultsets");
97     log_level_set = 1;
98 }
99
100 ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
101                           Z_RPNQuery *rpn, ZebraSet sset)
102 {
103     RSET rset = 0;
104     oident *attrset;
105     Z_SortKeySpecList *sort_sequence;
106     int sort_status, i;
107     ZEBRA_RES res = ZEBRA_OK;
108
109     zh->hits = 0;
110
111     sort_sequence = (Z_SortKeySpecList *)
112         nmem_malloc(nmem, sizeof(*sort_sequence));
113     sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
114     sort_sequence->specs = (Z_SortKeySpec **)
115         nmem_malloc(nmem, sort_sequence->num_specs *
116                      sizeof(*sort_sequence->specs));
117     for (i = 0; i<sort_sequence->num_specs; i++)
118         sort_sequence->specs[i] = 0;
119     
120     attrset = oid_getentbyoid (rpn->attributeSetId);
121     res = rpn_search_top(zh, rpn->RPNStructure, attrset->value,
122                          nmem, rset_nmem,
123                          sort_sequence,
124                          sset->num_bases, sset->basenames,
125                          &rset);
126     if (res != ZEBRA_OK)
127     {
128         sset->rset = 0;
129         return res;
130     }
131     for (i = 0; sort_sequence->specs[i]; i++)
132         ;
133     sort_sequence->num_specs = i;
134     rset->hits_limit = sset->approx_limit;
135     if (!i)
136     {
137         res = resultSetRank (zh, sset, rset, rset_nmem);
138     }
139     else
140     {
141         res = resultSetSortSingle (zh, nmem, sset, rset,
142                                    sort_sequence, &sort_status);
143     }
144     sset->rset = rset;
145     return res;
146 }
147
148
149 ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
150                            int num_bases, char **basenames,
151                            const char *setname)
152 {
153     ZebraSet zebraSet;
154     int i;
155     ZEBRA_RES res;
156
157     zh->hits = 0;
158
159     zebraSet = resultSetAdd(zh, setname, 1);
160     if (!zebraSet)
161         return ZEBRA_FAIL;
162     zebraSet->locked = 1;
163     zebraSet->rpn = 0;
164     zebraSet->nmem = m;
165     zebraSet->rset_nmem = nmem_create(); 
166
167     zebraSet->num_bases = num_bases;
168     zebraSet->basenames = 
169         nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
170     for (i = 0; i<num_bases; i++)
171         zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
172
173     res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
174                           rpn, zebraSet);
175     zh->hits = zebraSet->hits;
176     if (zebraSet->rset)
177         zebraSet->rpn = rpn;
178     zebraSet->locked = 0;
179     if (!zebraSet->rset)
180         return ZEBRA_FAIL;
181     return res;
182 }
183
184 void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
185                        const char *db, int set,
186                        int use, const char *term)
187 {
188     assert(zh); /* compiler shut up */
189     if (!s->nmem)
190         s->nmem = nmem_create ();
191     if (!s->term_entries)
192     {
193         int i;
194         s->term_entries_max = 1000;
195         s->term_entries =
196             nmem_malloc (s->nmem, s->term_entries_max * 
197                          sizeof(*s->term_entries));
198         for (i = 0; i < s->term_entries_max; i++)
199             s->term_entries[i].term = 0;
200     }
201     if (s->hits < s->term_entries_max)
202     {
203         s->term_entries[s->hits].reg_type = reg_type;
204         s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
205         s->term_entries[s->hits].set = set;
206         s->term_entries[s->hits].use = use;
207         s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);
208     }
209     (s->hits)++;
210 }
211
212 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
213 {
214     ZebraSet s;
215     int i;
216
217     for (s = zh->sets; s; s = s->next)
218         if (!strcmp (s->name, name))
219             break;
220     
221     if (!log_level_set)
222         loglevels();
223     if (s)
224     {
225         yaz_log(log_level_resultsets, "updating result set %s", name);
226         if (!ov || s->locked)
227             return NULL;
228         if (s->rset)
229         {
230             if (s->cache_rfd)
231                 rset_close(s->cache_rfd);
232             rset_delete (s->rset);
233         }
234         if (s->rset_nmem)
235             nmem_destroy (s->rset_nmem);
236         if (s->nmem)
237             nmem_destroy (s->nmem);
238     }
239     else
240     {
241         const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
242
243         yaz_log(log_level_resultsets, "adding result set %s", name);
244         s = (ZebraSet) xmalloc (sizeof(*s));
245         s->next = zh->sets;
246         zh->sets = s;
247         s->name = (char *) xmalloc (strlen(name)+1);
248         strcpy (s->name, name);
249
250         s->sort_info = (struct zset_sort_info *)
251             xmalloc (sizeof(*s->sort_info));
252         s->sort_info->max_entries = atoi(sort_max_str);
253         if (s->sort_info->max_entries < 2)
254             s->sort_info->max_entries = 2;
255
256         s->sort_info->entries = (struct zset_sort_entry **)
257             xmalloc (sizeof(*s->sort_info->entries) *
258                      s->sort_info->max_entries);
259         s->sort_info->all_entries = (struct zset_sort_entry *)
260             xmalloc (sizeof(*s->sort_info->all_entries) *
261                      s->sort_info->max_entries);
262         for (i = 0; i < s->sort_info->max_entries; i++)
263             s->sort_info->entries[i] = s->sort_info->all_entries + i;
264     }
265     s->locked = 0;
266     s->term_entries = 0;
267     s->hits = 0;
268     s->rset = 0;
269     s->rset_nmem = 0;
270     s->nmem = 0;
271     s->rpn = 0;
272     s->sortSpec = 0;
273     s->cache_position = 0;
274     s->cache_rfd = 0;
275     s->approx_limit = zh->approx_limit;
276     return s;
277 }
278
279 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
280 {
281     ZebraSet s;
282
283     for (s = zh->sets; s; s = s->next)
284         if (!strcmp (s->name, name))
285         {
286             if (!s->term_entries && !s->rset && s->rpn)
287             {
288                 NMEM nmem = nmem_create ();
289                 yaz_log(log_level_resultsets, "research %s", name);
290                 if (!s->rset_nmem)
291                     s->rset_nmem=nmem_create();
292                 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
293                 if (s->rset && s->sortSpec)
294                 {
295                     int sort_status;
296                     yaz_log(log_level_resultsets, "resort %s", name);
297                     resultSetSortSingle (zh, nmem, s, s->rset, s->sortSpec,
298                                          &sort_status);
299                 }
300                 nmem_destroy (nmem);
301             }
302             return s;
303         }
304     return NULL;
305 }
306
307 void resultSetInvalidate (ZebraHandle zh)
308 {
309     ZebraSet s = zh->sets;
310     
311     yaz_log(log_level_resultsets, "invalidating result sets");
312     for (; s; s = s->next)
313     {
314         if (s->rset)
315         {
316             if (s->cache_rfd)
317                 rset_close(s->cache_rfd);
318             rset_delete (s->rset);
319         }
320         s->rset = 0;
321         s->cache_rfd = 0;
322         s->cache_position = 0;
323         if (s->rset_nmem)
324             nmem_destroy(s->rset_nmem);
325         s->rset_nmem=0;
326     }
327 }
328
329 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
330 {
331     ZebraSet * ss = &zh->sets;
332     int i;
333     
334     if (statuses)
335         for (i = 0; i<num; i++)
336             statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
337     while (*ss)
338     {
339         int i = -1;
340         ZebraSet s = *ss;
341         if (num >= 0)
342         {
343             for (i = 0; i<num; i++)
344                 if (!strcmp (s->name, names[i]))
345                 {
346                     if (statuses)
347                         statuses[i] = Z_DeleteStatus_success;
348                     i = -1;
349                     break;
350                 }
351         }
352         if (i < 0)
353         {
354             *ss = s->next;
355             
356             xfree (s->sort_info->all_entries);
357             xfree (s->sort_info->entries);
358             xfree (s->sort_info);
359             
360             if (s->nmem)
361                 nmem_destroy (s->nmem);
362             if (s->rset)
363             {
364                 if (s->cache_rfd)
365                     rset_close(s->cache_rfd);
366                 rset_delete (s->rset);
367             }
368             if (s->rset_nmem)
369                 nmem_destroy(s->rset_nmem);
370             xfree (s->name);
371             xfree (s);
372         }
373         else
374             ss = &s->next;
375     }
376 }
377
378 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
379                                                  const char *name, 
380                                                  zint start, int num)
381 {
382     zint pos_small[10];
383     zint *pos = pos_small;
384     ZebraMetaRecord *mr;
385     int i;
386
387     if (num > 10000 || num <= 0)
388         return 0;
389
390     if (num > 10)
391         pos = xmalloc(sizeof(*pos) * num);
392     
393     for (i = 0; i<num; i++)
394         pos[i] = start+i;
395
396     mr = zebra_meta_records_create(zh, name, num, pos);
397     
398     if (num > 10)
399         xfree(pos);
400     return mr;
401 }
402
403 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, 
404                                            int num, zint *positions)
405 {
406     ZebraSet sset;
407     ZebraMetaRecord *sr = 0;
408     RSET rset;
409     int i;
410     struct zset_sort_info *sort_info;
411     size_t sysno_mem_index = 0;
412
413     if (zh->m_staticrank)
414         sysno_mem_index = 1;
415
416     if (!log_level_set)
417         loglevels();
418     if (!(sset = resultSetGet (zh, name)))
419         return NULL;
420     if (!(rset = sset->rset))
421     {
422         if (!sset->term_entries)
423             return 0;
424         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
425         for (i = 0; i<num; i++)
426         {
427             sr[i].sysno = 0;
428             sr[i].score = -1;
429             sr[i].term = 0;
430             sr[i].db = 0;
431
432             if (positions[i] <= sset->term_entries_max)
433             {
434                 sr[i].term = sset->term_entries[positions[i]-1].term;
435                 sr[i].db = sset->term_entries[positions[i]-1].db;
436             }
437         }
438     }
439     else
440     {
441         sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num);
442         for (i = 0; i<num; i++)
443         {
444             sr[i].sysno = 0;
445             sr[i].score = -1;
446             sr[i].term = 0;
447             sr[i].db = 0;
448         }
449         sort_info = sset->sort_info;
450         if (sort_info)
451         {
452             zint position;
453             
454             for (i = 0; i<num; i++)
455             {
456                 position = positions[i];
457                 if (position > 0 && position <= sort_info->num_entries)
458                 {
459                     yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
460                             " (sorted)", position);
461                     sr[i].sysno = sort_info->entries[position-1]->sysno;
462                     sr[i].score = sort_info->entries[position-1]->score;
463                 }
464             }
465         }
466         /* did we really get all entries using sort ? */
467         for (i = 0; i<num; i++)
468         {
469             if (!sr[i].sysno)
470                 break;
471         }
472         if (i < num) /* nope, get the rest, unsorted - sorry */
473         {
474             zint position = 0;
475             int num_i = 0;
476             zint psysno = 0;
477             RSFD rfd;
478             struct it_key key;
479             
480             if (sort_info)
481                 position = sort_info->num_entries;
482             while (num_i < num && positions[num_i] <= position)
483                 num_i++;
484             
485             if (sset->cache_rfd &&
486                 num_i < num && positions[num_i] > sset->cache_position)
487             {
488                 position = sset->cache_position;
489                 rfd = sset->cache_rfd;
490                 psysno = sset->cache_psysno;
491             }
492             else
493             {
494                 if (sset->cache_rfd)
495                     rset_close(sset->cache_rfd);
496                 rfd = rset_open (rset, RSETF_READ);
497             }
498             while (num_i < num && rset_read (rfd, &key, 0))
499             {
500                 zint this_sys = key.mem[sysno_mem_index];
501                 if (this_sys != psysno)
502                 {
503                     psysno = this_sys;
504                     if (sort_info)
505                     {
506                         /* determine we alreay have this in our set */
507                         for (i = sort_info->num_entries; --i >= 0; )
508                             if (psysno == sort_info->entries[i]->sysno)
509                                 break;
510                         if (i >= 0)
511                             continue;
512                     }
513                     position++;
514                     assert (num_i < num);
515                     if (position == positions[num_i])
516                     {
517                         sr[num_i].sysno = psysno;
518                         yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
519                         sr[num_i].score = -1;
520                         num_i++;
521                     }
522                 }
523             }
524             sset->cache_position = position;
525             sset->cache_psysno = psysno;
526             sset->cache_rfd = rfd;
527         }
528     }
529     return sr;
530 }
531
532 void zebra_meta_records_destroy (ZebraHandle zh, ZebraMetaRecord *records,
533                                  int num)
534 {
535     assert(zh); /* compiler shut up about unused arg */
536     xfree (records);
537 }
538
539 struct sortKeyInfo {
540     int relation;
541 #if 0
542     int attrUse;
543 #else
544     int ord;
545 #endif
546     int numerical;
547 };
548
549 void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
550                           struct sortKeyInfo *criteria, int num_criteria,
551                           zint sysno)
552 {
553     struct zset_sort_entry this_entry;
554     struct zset_sort_entry *new_entry = NULL;
555     struct zset_sort_info *sort_info = sset->sort_info;
556     int i, j;
557
558     sortIdx_sysno (zh->reg->sortIdx, sysno);
559     for (i = 0; i<num_criteria; i++)
560     {
561         sortIdx_type (zh->reg->sortIdx, criteria[i].ord);
562         sortIdx_read (zh->reg->sortIdx, this_entry.buf[i]);
563     }
564     i = sort_info->num_entries;
565     while (--i >= 0)
566     {
567         int rel = 0;
568         for (j = 0; j<num_criteria; j++)
569         {
570             if (criteria[j].numerical)
571             {
572                 double diff = atof(this_entry.buf[j]) -
573                               atof(sort_info->entries[i]->buf[j]);
574                 rel = 0;
575                 if (diff > 0.0)
576                     rel = 1;
577                 else if (diff < 0.0)
578                     rel = -1;
579             }
580             else
581             {
582                 rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j],
583                           SORT_IDX_ENTRYSIZE);
584             }
585             if (rel)
586                 break;
587         }       
588         if (!rel)
589             break;
590         if (criteria[j].relation == 'A')
591         {
592             if (rel > 0)
593                 break;
594         }
595         else if (criteria[j].relation == 'D')
596         {
597             if (rel < 0)
598                 break;
599         }
600     }
601     ++i;
602     j = sort_info->max_entries;
603     if (i == j)
604         return;
605
606     if (sort_info->num_entries == j)
607         --j;
608     else
609         j = (sort_info->num_entries)++;
610     new_entry = sort_info->entries[j];
611     while (j != i)
612     {
613         sort_info->entries[j] = sort_info->entries[j-1];
614         --j;
615     }
616     sort_info->entries[i] = new_entry;
617     assert (new_entry);
618     for (i = 0; i<num_criteria; i++)
619         memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
620     new_entry->sysno = sysno;
621     new_entry->score = -1;
622 }
623
624 void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info,
625                           zint sysno, int score, int relation)
626 {
627     struct zset_sort_entry *new_entry = NULL;
628     int i, j;
629     assert(zh); /* compiler shut up about unused arg */
630
631     i = sort_info->num_entries;
632     while (--i >= 0)
633     {
634         int rel = 0;
635
636         rel = score - sort_info->entries[i]->score;
637
638         if (relation == 'D')
639         {
640             if (rel >= 0)
641                 break;
642         }
643         else if (relation == 'A')
644         {
645             if (rel <= 0)
646                 break;
647         }
648     }
649     ++i;
650     j = sort_info->max_entries;
651     if (i == j)
652         return;
653
654     if (sort_info->num_entries == j)
655         --j;
656     else
657         j = (sort_info->num_entries)++;
658     
659     new_entry = sort_info->entries[j];
660     while (j != i)
661     {
662         sort_info->entries[j] = sort_info->entries[j-1];
663         --j;
664     }
665     sort_info->entries[i] = new_entry;
666     assert (new_entry);
667     new_entry->sysno = sysno;
668     new_entry->score = score;
669 }
670
671 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
672 {
673     Z_RPNQuery *dst = 0;
674     ODR encode = odr_createmem(ODR_ENCODE);
675     ODR decode = odr_createmem(ODR_DECODE);
676
677     if (z_RPNQuery(encode, &src, 0, 0))
678     {
679         int len;
680         char *buf = odr_getbuf(encode, &len, 0);
681
682         if (buf)
683         {
684             odr_setbuf(decode, buf, len, 0);
685             z_RPNQuery(decode, &dst, 0, 0);
686         }
687     }
688     nmem_transfer(nmem, decode->mem);
689     odr_destroy(encode);
690     odr_destroy(decode);
691     return dst;
692 }
693
694 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
695 {
696     Z_SortKeySpecList *dst = 0;
697     ODR encode = odr_createmem(ODR_ENCODE);
698     ODR decode = odr_createmem(ODR_DECODE);
699
700     if (z_SortKeySpecList(encode, &src, 0, 0))
701     {
702         int len;
703         char *buf = odr_getbuf(encode, &len, 0);
704
705         if (buf)
706         {
707             odr_setbuf(decode, buf, len, 0);
708             z_SortKeySpecList(decode, &dst, 0, 0);
709         }
710     }
711     nmem_transfer(nmem, decode->mem);
712     odr_destroy(encode);
713     odr_destroy(decode);
714     return dst;
715 }
716
717 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
718                         ZebraSet rset)
719 {
720     ZebraSet nset;
721     int i;
722
723     nset = resultSetAdd(zh, setname, 1);
724     if (!nset)
725         return 0;
726
727     nset->nmem = nmem_create();
728
729     nset->num_bases = rset->num_bases;
730     nset->basenames = 
731         nmem_malloc (nset->nmem, nset->num_bases * sizeof(*rset->basenames));
732     for (i = 0; i<rset->num_bases; i++)
733         nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
734
735     if (rset->rset)
736         nset->rset = rset_dup(rset->rset);
737     if (rset->rpn)
738         nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
739     return nset;
740 }
741
742 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
743                         int num_input_setnames, const char **input_setnames,
744                         const char *output_setname,
745                         Z_SortKeySpecList *sort_sequence, int *sort_status)
746 {
747     ZebraSet sset;
748     RSET rset;
749
750     if (num_input_setnames == 0)
751     {
752         zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
753         return ZEBRA_FAIL;
754     }
755     if (num_input_setnames > 1)
756     {
757         zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
758         return ZEBRA_FAIL;
759     }
760     if (!log_level_set)
761         loglevels();
762     yaz_log(log_level_sort, "result set sort input=%s output=%s",
763           *input_setnames, output_setname);
764     sset = resultSetGet (zh, input_setnames[0]);
765     if (!sset)
766     {
767         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
768                        input_setnames[0]);
769         return ZEBRA_FAIL;
770     }
771     if (!(rset = sset->rset))
772     {
773         zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
774                        input_setnames[0]);
775         return ZEBRA_FAIL;
776     }
777     if (strcmp (output_setname, input_setnames[0]))
778         sset = resultSetClone(zh, output_setname, sset);
779     sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
780     return resultSetSortSingle (zh, nmem, sset, rset, sort_sequence,
781                                 sort_status);
782 }
783
784 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
785                               ZebraSet sset, RSET rset,
786                               Z_SortKeySpecList *sort_sequence,
787                               int *sort_status)
788 {
789     int i;
790     int n = 0;
791     zint kno = 0;
792     zint psysno = 0;
793     struct it_key key;
794     struct sortKeyInfo sort_criteria[3];
795     int num_criteria;
796     RSFD rfd;
797     TERMID termid;
798     TERMID *terms;
799     int numTerms = 0;
800     size_t sysno_mem_index = 0;
801
802     if (zh->m_staticrank)
803         sysno_mem_index = 1;
804
805
806     assert(nmem); /* compiler shut up about unused param */
807     sset->sort_info->num_entries = 0;
808
809     rset_getterms(rset, 0, 0, &n);
810     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
811     rset_getterms(rset, terms, n, &numTerms);
812
813     sset->hits = 0;
814     num_criteria = sort_sequence->num_specs;
815     if (num_criteria > 3)
816         num_criteria = 3;
817     for (i = 0; i < num_criteria; i++)
818     {
819         Z_SortKeySpec *sks = sort_sequence->specs[i];
820         Z_SortKey *sk;
821
822         if (*sks->sortRelation == Z_SortKeySpec_ascending)
823             sort_criteria[i].relation = 'A';
824         else if (*sks->sortRelation == Z_SortKeySpec_descending)
825             sort_criteria[i].relation = 'D';
826         else
827         {
828             zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
829             return ZEBRA_FAIL;
830         }
831         if (sks->sortElement->which == Z_SortElement_databaseSpecific)
832         {
833             zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
834             return ZEBRA_FAIL;
835         }
836         else if (sks->sortElement->which != Z_SortElement_generic)
837         {
838             zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
839             return ZEBRA_FAIL;
840         }       
841         sk = sks->sortElement->u.generic;
842         switch (sk->which)
843         {
844         case Z_SortKey_sortField:
845             yaz_log(log_level_sort, "key %d is of type sortField",
846                     i+1);
847             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
848             return ZEBRA_FAIL;
849         case Z_SortKey_elementSpec:
850             yaz_log(log_level_sort, "key %d is of type elementSpec",
851                     i+1);
852             zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
853             return ZEBRA_FAIL;
854         case Z_SortKey_sortAttributes:
855             yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
856             if (1)
857             {
858                 int ord;
859                 int use = zebra_maps_sort(zh->reg->zebra_maps,
860                                           sk->u.sortAttributes,
861                                           &sort_criteria[i].numerical);
862                 yaz_log(log_level_sort, "use value = %d", use);
863                 if (use == -1)
864                 {
865                     zebra_setError(
866                         zh, YAZ_BIB1_USE_ATTRIBUTE_REQUIRED_BUT_NOT_SUPPLIED, 0); 
867                     return ZEBRA_FAIL;
868                 }
869                 ord = zebraExplain_lookup_attr_su_any_index(zh->reg->zei, 
870                                                             VAL_IDXPATH, use);
871                 if (ord == -1)
872                 {
873                     zebra_setError(
874                         zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
875                     return ZEBRA_FAIL;
876                 }
877                 sort_criteria[i].ord = ord;
878             }
879             break;
880         }
881     }
882     rfd = rset_open (rset, RSETF_READ);
883     while (rset_read (rfd, &key, &termid))
884     {
885         zint this_sys = key.mem[sysno_mem_index];
886         if (log_level_searchhits)
887             key_logdump_txt(log_level_searchhits, &key, termid->name);
888         kno++;
889         if (this_sys != psysno)
890         {
891             (sset->hits)++;
892             psysno = this_sys;
893             resultSetInsertSort (zh, sset,
894                                  sort_criteria, num_criteria, psysno);
895         }
896     }
897     rset_close (rfd);
898     yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
899             kno, sset->hits);   
900     for (i = 0; i < numTerms; i++)
901         yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
902                  terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
903     *sort_status = Z_SortResponse_success;
904     return ZEBRA_OK;
905 }
906
907 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
908 {
909     ZebraSet s;
910
911     if ((s = resultSetGet (zh, resultSetId)))
912         return s->rset;
913     return NULL;
914 }
915
916 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
917                         RSET rset, NMEM nmem)
918 {
919     struct it_key key;
920     TERMID termid;
921     TERMID *terms;
922     zint kno = 0;
923     int numTerms = 0;
924     int n = 0;
925     int i;
926     ZebraRankClass rank_class;
927     struct zset_sort_info *sort_info;
928     const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
929     size_t sysno_mem_index = 0;
930
931     if (zh->m_staticrank)
932         sysno_mem_index = 1;
933
934     if (!log_level_set)
935         loglevels();
936     sort_info = zebraSet->sort_info;
937     sort_info->num_entries = 0;
938     zebraSet->hits = 0;
939     rset_getterms(rset, 0, 0, &n);
940     terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
941     rset_getterms(rset, terms, n, &numTerms);
942
943
944     rank_class = zebraRankLookup(zh, rank_handler_name);
945     if (!rank_class)
946     {
947         yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
948         zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
949         return ZEBRA_FAIL;
950     }
951     else
952     {
953         RSFD rfd = rset_open(rset, RSETF_READ);
954         struct rank_control *rc = rank_class->control;
955         double score;
956         zint count = 0;
957         
958         void *handle =
959             (*rc->begin) (zh->reg, rank_class->class_handle, rset, nmem,
960                           terms, numTerms);
961         zint psysno = 0;  /* previous doc id / sys no */
962         zint pstaticrank = 0; /* previous static rank */
963         int stop_flag = 0;
964         while (rset_read(rfd, &key, &termid))
965         {
966             zint this_sys = key.mem[sysno_mem_index];
967
968             zint seqno = key.mem[key.len-1];
969             kno++;
970             if (log_level_searchhits)
971                 key_logdump_txt(log_level_searchhits, &key, termid->name);
972             if (this_sys != psysno) 
973             {   /* new record .. */
974                 if (rfd->counted_items > rset->hits_limit)
975                     break;
976                 if (psysno)
977                 {   /* only if we did have a previous record */
978                     score = (*rc->calc) (handle, psysno, pstaticrank,
979                                          &stop_flag);
980                     /* insert the hit. A=Ascending */
981                     resultSetInsertRank (zh, sort_info, psysno, score, 'A');
982                     count++;
983                     if (stop_flag)
984                         break;
985                 }
986                 psysno = this_sys;
987                 if (zh->m_staticrank)
988                     pstaticrank = key.mem[0];
989             }
990             (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
991         }
992         /* no more items */
993         if (psysno)
994         {   /* we had - at least - one record */
995             score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
996             /* insert the hit. A=Ascending */
997             resultSetInsertRank(zh, sort_info, psysno, score, 'A');
998             count++;
999         }
1000         (*rc->end) (zh->reg, handle);
1001         rset_close (rfd);
1002     }
1003     zebraSet->hits = rset->hits_count;
1004
1005     yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1006             ZINT_FORMAT " sysnos, rank",  kno, zebraSet->hits);
1007     for (i = 0; i < numTerms; i++)
1008     {
1009         yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1010                 ZINT_FORMAT,
1011                 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1012     }
1013     return ZEBRA_OK;
1014 }
1015
1016 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1017 {
1018     ZebraRankClass p = zh->reg->rank_classes;
1019     while (p && strcmp (p->control->name, name))
1020         p = p->next;
1021     if (p && !p->init_flag)
1022     {
1023         if (p->control->create)
1024             p->class_handle = (*p->control->create)(zh);
1025         p->init_flag = 1;
1026     }
1027     return p;
1028 }
1029
1030 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1031 {
1032     ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
1033     p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
1034     memcpy (p->control, ctrl, sizeof(*p->control));
1035     p->control->name = xstrdup (ctrl->name);
1036     p->init_flag = 0;
1037     p->next = reg->rank_classes;
1038     reg->rank_classes = p;
1039 }
1040
1041 void zebraRankDestroy(struct zebra_register *reg)
1042 {
1043     ZebraRankClass p = reg->rank_classes;
1044     while (p)
1045     {
1046         ZebraRankClass p_next = p->next;
1047         if (p->init_flag && p->control->destroy)
1048             (*p->control->destroy)(reg, p->class_handle);
1049         xfree(p->control->name);
1050         xfree(p->control);
1051         xfree(p);
1052         p = p_next;
1053     }
1054     reg->rank_classes = NULL;
1055 }
1056
1057 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1058                                  zint *hits_array, int *approx_array)
1059 {
1060     int no = 0;
1061     int i;
1062     for (i = 0; i<rset->no_children; i++)
1063         no += trav_rset_for_termids(rset->children[i],
1064                                     (termid_array ? termid_array + no : 0),
1065                                     (hits_array ? hits_array + no : 0),
1066                                     (approx_array ? approx_array + no : 0));
1067     if (rset->term)
1068     {
1069         if (termid_array)
1070             termid_array[no] = rset->term;
1071         if (hits_array)
1072             hits_array[no] = rset->hits_count;
1073         if (approx_array)
1074             approx_array[no] = rset->hits_approx;
1075 #if 0
1076         yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1077                 " count=" ZINT_FORMAT,
1078                 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1079 #endif
1080         no++;
1081     }
1082     return no;
1083 }
1084
1085 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1086                                    int *num_terms)
1087 {
1088     ZebraSet sset = resultSetGet(zh, setname);
1089     *num_terms = 0;
1090     if (sset)
1091     {
1092         *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1093         return ZEBRA_OK;
1094     }
1095     return ZEBRA_FAIL;
1096 }
1097
1098 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1099                                      int no, zint *count, int *approx,
1100                                      char *termbuf, size_t *termlen,
1101                                      const char **term_ref_id)
1102 {
1103     ZebraSet sset = resultSetGet(zh, setname);
1104     if (sset)
1105     {
1106         int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1107         if (no >= 0 && no < num_terms)
1108         {
1109             TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1110             zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1111             int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1112             
1113             trav_rset_for_termids(sset->rset, term_array,
1114                                   hits_array, approx_array);
1115
1116             if (count)
1117                 *count = hits_array[no];
1118             if (approx)
1119                 *approx = approx_array[no];
1120             if (termbuf)
1121             {
1122                 char *inbuf = term_array[no]->name;
1123                 size_t inleft = strlen(inbuf);
1124                 size_t outleft = *termlen - 1;
1125
1126                 if (zh->iconv_from_utf8 != 0)
1127                 {
1128                     char *outbuf = termbuf;
1129                     size_t ret;
1130                     
1131                     ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1132                                     &outbuf, &outleft);
1133                     if (ret == (size_t)(-1))
1134                         *termlen = 0;
1135                     else
1136                         *termlen = outbuf - termbuf;
1137                 }
1138                 else
1139                 {
1140                     if (inleft > outleft)
1141                         inleft = outleft;
1142                     *termlen = inleft;
1143                     memcpy(termbuf, inbuf, *termlen);
1144                 }
1145                 termbuf[*termlen] = '\0';
1146             }
1147             if (term_ref_id)
1148                 *term_ref_id = term_array[no]->ref_id;
1149
1150             xfree(term_array);
1151             xfree(hits_array);
1152             xfree(approx_array);
1153             return ZEBRA_OK;
1154         }
1155     }
1156     return ZEBRA_FAIL;
1157 }
1158
1159 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1160                                     zint sysno, zebra_snippets *snippets)
1161 {
1162     ZebraSet sset = resultSetGet(zh, setname);
1163     yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1164             setname, sysno);
1165     if (!sset)
1166         return ZEBRA_FAIL;
1167     else
1168     {
1169         struct rset_key_control *kc = zebra_key_control_create(zh);
1170         NMEM nmem = nmem_create();
1171         struct it_key key;
1172         RSET rsets[2], rset_comb;
1173         RSET rset_temp = rstemp_create(nmem, kc, kc->scope, 
1174                                        res_get (zh->res, "setTmpDir"),0 );
1175         
1176         TERMID termid;
1177         RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1178         
1179         key.mem[0] = sysno;
1180         key.mem[1] = 0;
1181         key.mem[2] = 0;
1182         key.mem[3] = 0;
1183         key.len = 2;
1184         rset_write (rsfd, &key);
1185         rset_close (rsfd);
1186
1187         rsets[0] = rset_temp;
1188         rsets[1] = rset_dup(sset->rset);
1189         
1190         rset_comb = rsmulti_and_create(nmem, kc, kc->scope, 2, rsets);
1191
1192         rsfd = rset_open(rset_comb, RSETF_READ);
1193
1194         while (rset_read(rsfd, &key, &termid))
1195         {
1196             if (termid)
1197             {
1198                 struct ord_list *ol;
1199                 for (ol = termid->ol; ol; ol = ol->next)
1200                 {
1201                     zebra_snippets_append(snippets, key.mem[key.len-1],
1202                                           ol->ord, termid->name);
1203                 }
1204             }
1205         }
1206         rset_close(rsfd);
1207         
1208         rset_delete(rset_comb);
1209         nmem_destroy(nmem);
1210         kc->dec(kc);
1211     }
1212     return ZEBRA_OK;
1213 }
1214
1215 /*
1216  * Local variables:
1217  * c-basic-offset: 4
1218  * indent-tabs-mode: nil
1219  * End:
1220  * vim: shiftwidth=4 tabstop=8 expandtab
1221  */
1222