Remove unused code section
[pazpar2-moved-to-github.git] / src / reclists.c
1 /* This file is part of Pazpar2.
2    Copyright (C) 2006-2013 Index Data
3
4 Pazpar2 is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <assert.h>
21
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <yaz/yaz-util.h>
27
28 #include "ppmutex.h"
29 #include "session.h"
30 #include "reclists.h"
31 #include "jenkins_hash.h"
32
33 struct reclist
34 {
35     struct reclist_bucket **hashtable;
36     unsigned hash_size;
37
38     int num_records;
39     struct reclist_bucket *sorted_list;
40     struct reclist_bucket *sorted_ptr;
41     NMEM nmem;
42     YAZ_MUTEX mutex;
43 };
44
45 struct reclist_bucket
46 {
47     struct record_cluster *record;
48     struct reclist_bucket *hash_next;
49     struct reclist_bucket *sorted_next;
50     struct reclist_sortparms *sort_parms;
51 };
52
53 static void append_merge_keys(struct record_metadata_attr **p,
54                               const struct record_metadata_attr *a,
55                               NMEM nmem)
56 {
57     for (; a; a = a->next)
58     {
59         struct record_metadata_attr **pi = p;
60         for (; *pi; pi = &(*pi)->next)
61             if (!strcmp((*pi)->value, a->value))
62                 break;
63         if (!*pi)
64         {
65             *pi = (struct record_metadata_attr *) nmem_malloc(nmem, sizeof(**p));
66             (*pi)->name = nmem_strdup_null(nmem, a->name);
67             (*pi)->value = nmem_strdup_null(nmem, a->value);
68             (*pi)->next = 0;
69         }
70     }
71 }
72
73 struct reclist_sortparms *reclist_parse_sortparms(NMEM nmem, const char *parms,
74                                                   struct conf_service *service)
75 {
76     struct reclist_sortparms *res = 0;
77     struct reclist_sortparms **rp = &res;
78
79     if (strlen(parms) > 256)
80         return 0;
81     while (*parms)
82     {
83         char parm[256];
84         char *pp;
85         const char *cpp;
86         int increasing = 0;
87         int i;
88         int offset = 0;
89         enum conf_sortkey_type type = Metadata_sortkey_string;
90         struct reclist_sortparms *new;
91
92         if (!(cpp = strchr(parms, ',')))
93             cpp = parms + strlen(parms);
94         strncpy(parm, parms, cpp - parms);
95         parm[cpp-parms] = '\0';
96
97         if ((pp = strchr(parm, ':')))
98         {
99             if (pp[1] == '1')
100                 increasing = 1;
101             else if (pp[1] == '0')
102                 increasing = 0;
103             else
104             {
105                 yaz_log(YLOG_FATAL, "Bad sortkey modifier: %s", parm);
106                 return 0;
107             }
108
109             if (pp[2])
110             {
111                 if (pp[2] == 'p')
112                     type = Metadata_sortkey_position;
113                 else
114                     yaz_log(YLOG_FATAL, "Bad sortkey modifier: %s", parm);
115             }
116             *pp = '\0';
117         }
118         if (type != Metadata_sortkey_position)
119         {
120             if (!strcmp(parm, "relevance"))
121             {
122                 type = Metadata_sortkey_relevance;
123             }
124             else if (!strcmp(parm, "position"))
125             {
126                 type = Metadata_sortkey_position;
127             }
128             else
129             {
130                 for (i = 0; i < service->num_sortkeys; i++)
131                 {
132                     struct conf_sortkey *sk = &service->sortkeys[i];
133                     if (!strcmp(sk->name, parm))
134                     {
135                         type = sk->type;
136                         if (type == Metadata_sortkey_skiparticle)
137                             type = Metadata_sortkey_string;
138                         break;
139                     }
140                 }
141                 if (i >= service->num_sortkeys)
142                 {
143                     yaz_log(YLOG_FATAL, "Sortkey not defined in service: %s",
144                             parm);
145                     return 0;
146                 }
147                 offset = i;
148             }
149         }
150         new = *rp = nmem_malloc(nmem, sizeof(struct reclist_sortparms));
151         new->next = 0;
152         new->offset = offset;
153         new->type = type;
154         new->increasing = increasing;
155         new->name = nmem_strdup(nmem, parm);
156         rp = &new->next;
157         if (*(parms = cpp))
158             parms++;
159     }
160     return res;
161 }
162
163 static int reclist_cmp(const void *p1, const void *p2)
164 {
165     struct reclist_sortparms *sortparms =
166         (*(struct reclist_bucket **) p1)->sort_parms;
167     struct record_cluster *r1 = (*(struct reclist_bucket**) p1)->record;
168     struct record_cluster *r2 = (*(struct reclist_bucket**) p2)->record;
169     struct reclist_sortparms *s;
170     int res = 0;
171
172     for (s = sortparms; s && res == 0; s = s->next)
173     {
174         union data_types *ut1 = r1->sortkeys[s->offset];
175         union data_types *ut2 = r2->sortkeys[s->offset];
176         const char *s1, *s2;
177         switch (s->type)
178         {
179         case Metadata_sortkey_relevance:
180             res = r2->relevance_score - r1->relevance_score;
181             break;
182         case Metadata_sortkey_string:
183             s1 = ut1 ? ut1->text.sort : "";
184             s2 = ut2 ? ut2->text.sort : "";
185             res = strcmp(s2, s1);
186             if (res)
187             {
188                 if (s->increasing)
189                     res *= -1;
190             }
191             break;
192         case Metadata_sortkey_numeric:
193             if (ut1 && ut2)
194             {
195                 if (s->increasing)
196                     res = ut1->number.min  - ut2->number.min;
197                 else
198                     res = ut2->number.max  - ut1->number.max;
199             }
200             else if (ut1 && !ut2)
201                 res = -1;
202             else if (!ut1 && ut2)
203                 res = 1;
204             else
205                 res = 0;
206             break;
207         case Metadata_sortkey_position:
208             if (r1->records && r2->records)
209             {
210                 int pos1 = 0, pos2 = 0;
211                 struct record *rec;
212                 for (rec = r1->records; rec; rec = rec->next)
213                     if (pos1 == 0 || rec->position < pos1)
214                         pos1 = rec->position;
215                 for (rec = r2->records; rec; rec = rec->next)
216                     if (pos2 == 0 || rec->position < pos2)
217                         pos2 = rec->position;
218                 res = pos1 - pos2;
219             }
220             break;
221         default:
222             yaz_log(YLOG_WARN, "Bad sort type: %d", s->type);
223             res = 0;
224             break;
225         }
226     }
227     if (res == 0)
228         res = strcmp(r1->recid, r2->recid);
229     return res;
230 }
231
232 void reclist_limit(struct reclist *l, struct session *se, int lazy)
233 {
234     unsigned i;
235     int num = 0;
236     struct reclist_bucket **pp = &l->sorted_list;
237
238     reclist_enter(l);
239
240     if (!lazy || !*pp)
241     {
242         for (i = 0; i < l->hash_size; i++)
243         {
244             struct reclist_bucket *p;
245             for (p = l->hashtable[i]; p; p = p->hash_next)
246             {
247                 if (session_check_cluster_limit(se, p->record))
248                 {
249                     *pp = p;
250                     pp = &p->sorted_next;
251                     num++;
252                 }
253             }
254         }
255         *pp = 0;
256     }
257     l->num_records = num;
258     reclist_leave(l);
259 }
260
261 void reclist_sort(struct reclist *l, struct reclist_sortparms *parms)
262 {
263     struct reclist_bucket **flatlist = xmalloc(sizeof(*flatlist) * l->num_records);
264     struct reclist_bucket *ptr;
265     struct reclist_bucket **prev;
266     int i = 0;
267
268     reclist_enter(l);
269
270     ptr = l->sorted_list;
271     prev = &l->sorted_list;
272     while (ptr)
273     {
274         ptr->sort_parms = parms;
275         flatlist[i] = ptr;
276         ptr = ptr->sorted_next;
277         i++;
278     }
279     assert(i == l->num_records);
280
281     qsort(flatlist, l->num_records, sizeof(*flatlist), reclist_cmp);
282     for (i = 0; i < l->num_records; i++)
283     {
284         *prev = flatlist[i];
285         prev = &flatlist[i]->sorted_next;
286     }
287     *prev = 0;
288
289     xfree(flatlist);
290
291     reclist_leave(l);
292 }
293
294 struct record_cluster *reclist_read_record(struct reclist *l)
295 {
296     if (l && l->sorted_ptr)
297     {
298         struct record_cluster *t = l->sorted_ptr->record;
299         l->sorted_ptr = l->sorted_ptr->sorted_next;
300         return t;
301     }
302     else
303         return 0;
304 }
305
306 void reclist_enter(struct reclist *l)
307 {
308     yaz_mutex_enter(l->mutex);
309     if (l)
310         l->sorted_ptr = l->sorted_list;
311 }
312
313
314 void reclist_leave(struct reclist *l)
315 {
316     yaz_mutex_leave(l->mutex);
317     if (l)
318         l->sorted_ptr = l->sorted_list;
319 }
320
321
322 struct reclist *reclist_create(NMEM nmem)
323 {
324     struct reclist *res = nmem_malloc(nmem, sizeof(struct reclist));
325     res->hash_size = 399;
326     res->hashtable
327         = nmem_malloc(nmem, res->hash_size * sizeof(struct reclist_bucket*));
328     memset(res->hashtable, 0, res->hash_size * sizeof(struct reclist_bucket*));
329     res->nmem = nmem;
330
331     res->sorted_ptr = 0;
332     res->sorted_list = 0;
333
334     res->num_records = 0;
335     res->mutex = 0;
336     pazpar2_mutex_create(&res->mutex, "reclist");
337     return res;
338 }
339
340 void reclist_destroy(struct reclist *l)
341 {
342     if (l)
343     {
344         unsigned i;
345         for (i = 0; i < l->hash_size; i++)
346         {
347             struct reclist_bucket *p;
348             for (p = l->hashtable[i]; p; p = p->hash_next)
349             {
350                 wrbuf_destroy(p->record->relevance_explain1);
351                 wrbuf_destroy(p->record->relevance_explain2);
352                 p->record->relevance_explain1 = 0;
353                 p->record->relevance_explain2 = 0;
354             }
355         }
356         yaz_mutex_destroy(&l->mutex);
357     }
358 }
359
360 int reclist_get_num_records(struct reclist *l)
361 {
362     if (l)
363         return l->num_records;
364     return 0;
365 }
366
367 static void merge_cluster(struct reclist *l,
368                           struct relevance *r,
369                           struct record_cluster *dst,
370                           struct record_cluster *src)
371 {
372     struct record **rp = &dst->records;
373     for (; *rp; rp = &(*rp)->next)
374         ;
375     *rp = src->records;
376
377     /* not merging metadata and sortkeys yet */
378
379     relevance_mergerec(r, dst, src);
380
381     wrbuf_puts(dst->relevance_explain1, wrbuf_cstr(src->relevance_explain1));
382     wrbuf_puts(dst->relevance_explain2, wrbuf_cstr(src->relevance_explain2));
383
384     wrbuf_destroy(src->relevance_explain1);
385     src->relevance_explain1 = 0;
386     wrbuf_destroy(src->relevance_explain2);
387     src->relevance_explain2 = 0;
388
389     append_merge_keys(&dst->merge_keys, src->merge_keys, l->nmem);
390 }
391
392 static struct record_cluster *new_cluster(
393     struct reclist *l,
394     struct relevance *r,
395     struct conf_service *service,
396     struct record *record,
397     struct record_metadata_attr *merge_keys
398     )
399 {
400     struct record_cluster *cluster;
401     cluster = nmem_malloc(l->nmem, sizeof(*cluster));
402
403     record->next = 0;
404     cluster->records = record;
405     cluster->merge_keys = 0;
406     append_merge_keys(&cluster->merge_keys, merge_keys, l->nmem);
407     cluster->relevance_score = 0;
408     cluster->recid = cluster->merge_keys->value;
409     cluster->metadata =
410         nmem_malloc(l->nmem,
411                     sizeof(struct record_metadata*) * service->num_metadata);
412     memset(cluster->metadata, 0,
413            sizeof(struct record_metadata*) * service->num_metadata);
414     cluster->sortkeys =
415         nmem_malloc(l->nmem, sizeof(struct record_metadata*) * service->num_sortkeys);
416     memset(cluster->sortkeys, 0,
417            sizeof(union data_types*) * service->num_sortkeys);
418     relevance_newrec(r, cluster);
419     cluster->relevance_explain1 = wrbuf_alloc();
420     cluster->relevance_explain2 = wrbuf_alloc();
421     /* attach to hash list */
422     l->num_records++;
423     l->sorted_list = l->sorted_ptr = 0;
424     return cluster;
425 }
426
427 // Insert a record. Return record cluster (newly formed or pre-existing)
428 struct record_cluster *reclist_insert(struct reclist *l,
429                                       struct relevance *r,
430                                       struct conf_service *service,
431                                       struct record *record,
432                                       struct record_metadata_attr *merge_keys,
433                                       int *total)
434 {
435     struct record_cluster *cluster = 0;
436     struct record_metadata_attr *mkl = merge_keys;
437
438     assert(service);
439     assert(l);
440     assert(record);
441     assert(merge_keys);
442     assert(total);
443
444     yaz_mutex_enter(l->mutex);
445
446     for (; mkl; mkl = mkl->next)
447     {
448         const char *merge_key = mkl->value;
449         unsigned int bucket =
450             jenkins_hash((unsigned char*) merge_key) % l->hash_size;
451         struct reclist_bucket **p;
452         struct reclist_bucket *rb = 0;
453
454         for (p = &l->hashtable[bucket]; *p; p = &(*p)->hash_next)
455         {
456             struct record_metadata_attr *mkr = (*p)->record->merge_keys;
457             for (; mkr; mkr = mkr->next)
458             {
459                 // We found a matching record. Merge them
460                 if (!strcmp(merge_key, mkr->value))
461                 {
462                     struct record **re;
463
464                     rb = *p;
465                     for (re = &rb->record->records; *re; re = &(*re)->next)
466                     {
467                         if ((*re)->client == record->client &&
468                             record_compare(record, *re, service))
469                         {
470                             yaz_mutex_leave(l->mutex);
471                             return 0;
472                         }
473                     }
474
475                     if (!cluster)
476                     {
477                         cluster = rb->record;
478                         *re = record;
479                         record->next = 0;
480                     }
481                     else
482                     {
483                         if (cluster != rb->record)
484                         {
485                             if (!rb->record->records)
486                             {
487                                 ; /* already merged */
488                             }
489                             else
490                             {
491                                 merge_cluster(l, r, cluster, rb->record);
492
493                                 rb->record->records = 0; /* signal merged */
494                             }
495                             /* update the hash table */
496                             rb->record = cluster;
497                         }
498                     }
499                 }
500             }
501         }
502         if (!cluster)
503         {
504             (*total)++;
505             cluster = new_cluster(l, r, service, record, merge_keys);
506         }
507
508         if (!rb)
509         {
510             rb = nmem_malloc(l->nmem, sizeof(*rb));
511             rb->record = cluster;
512             rb->hash_next = 0;
513
514             *p = rb;
515         }
516     }
517     yaz_mutex_leave(l->mutex);
518     return cluster;
519 }
520
521 int reclist_sortparms_cmp(struct reclist_sortparms *sort1, struct reclist_sortparms *sort2)
522 {
523     int rc;
524     if (sort1 == sort2)
525         return 0;
526     if (sort1 == 0 || sort2 == 0)
527         return 1;
528     rc = strcmp(sort1->name, sort2->name) || sort1->increasing != sort2->increasing || sort1->type != sort2->type;
529     return rc;
530 }
531 /*
532  * Local variables:
533  * c-basic-offset: 4
534  * c-file-style: "Stroustrup"
535  * indent-tabs-mode: nil
536  * End:
537  * vim: shiftwidth=4 tabstop=8 expandtab
538  */
539