Fixed bug which related to scanning of large indexes.
[idzebra-moved-to-github.git] / dict / scan.c
1 /*
2  * Copyright (C) 1994-1998, Index Data I/S 
3  * All rights reserved.
4  * Sebastian Hammer, Adam Dickmeiss
5  *
6  * $Log: scan.c,v $
7  * Revision 1.10  1998-03-06 16:58:04  adam
8  * Fixed bug which related to scanning of large indexes.
9  *
10  * Revision 1.9  1997/10/27 14:33:04  adam
11  * Moved towards generic character mapping depending on "structure"
12  * field in abstract syntax file. Fixed a few memory leaks. Fixed
13  * bug with negative integers when doing searches with relational
14  * operators.
15  *
16  * Revision 1.8  1996/02/02 13:43:52  adam
17  * The public functions simply use char instead of Dict_char to represent
18  * search strings. Dict_char is used internally only.
19  *
20  * Revision 1.7  1995/12/11  09:04:50  adam
21  * Bug fix: the lookup/scan/lookgrep didn't handle empty dictionary.
22  *
23  * Revision 1.6  1995/11/20  11:58:04  adam
24  * Support for YAZ in standard located directories, such as /usr/local/..
25  *
26  * Revision 1.5  1995/10/09  16:18:32  adam
27  * Function dict_lookup_grep got extra client data parameter.
28  *
29  * Revision 1.4  1995/10/06  13:52:00  adam
30  * Bug fixes. Handler may abort further scanning.
31  *
32  * Revision 1.3  1995/10/06  11:06:07  adam
33  * Bug fixes.
34  *
35  * Revision 1.2  1995/10/06  10:43:16  adam
36  * Minor changes.
37  *
38  * Revision 1.1  1995/10/06  09:04:18  adam
39  * First version of scan.
40  *
41  */
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <assert.h>
46
47 #include <dict.h>
48
49 int dict_scan_trav (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, 
50                     int start, int *count, void *client,
51                     int (*userfunc)(char *, const char *, int, void *),
52                     int dir)
53 {
54     int lo, hi, j;
55     void *p;
56     short *indxp;
57     char *info;
58
59     dict_bf_readp (dict->dbf, ptr, &p);
60     hi = DICT_nodir(p)-1;
61     if (start == 0 && dir == -1)
62         lo = hi;
63     else
64         lo = start;
65     indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); 
66
67     while (lo <= hi && lo >= 0 && *count > 0)
68     {
69         if (indxp[-lo] > 0)
70         {
71             /* string (Dict_char *) DICT_EOS terminated */
72             /* unsigned char        length of information */
73             /* char *               information */
74
75             info = (char*)p + indxp[-lo];
76             for (j = 0; info[j] != DICT_EOS; j++)
77                 str[pos+j] = info[j];
78             str[pos+j] = DICT_EOS;
79             if ((*userfunc)((char*) str, info+(j+1)*sizeof(Dict_char),
80                             *count * dir, client))
81                 return 1;
82             --(*count);
83         }
84         else
85         {
86             Dict_char dc;
87             Dict_ptr subptr;
88
89             /* Dict_ptr             subptr */
90             /* Dict_char            sub char */
91             /* unsigned char        length of information */
92             /* char *               information */
93
94             info = (char*)p - indxp[-lo];
95             memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
96             str[pos] = dc;
97             memcpy (&subptr, info, sizeof(Dict_ptr));
98             if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
99             {
100                  str[pos+1] = DICT_EOS;
101                  if ((*userfunc)((char*) str,
102                                  info+sizeof(Dict_ptr)+sizeof(Dict_char),
103                                  *count * dir, client))
104                      return 1;
105                  --(*count);
106             }
107             if (*count > 0 && subptr)
108             {
109                 dict_scan_trav (dict, subptr, pos+1, str, 0, count, 
110                                 client, userfunc, dir);
111                 dict_bf_readp (dict->dbf, ptr, &p);
112                 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); 
113             }
114         }
115         lo += dir;
116     }
117     return 0;
118 }
119
120 int dict_scan_r (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, 
121                  int *before, int *after, void *client,
122                  int (*userfunc)(char *, const char *, int, void *))
123 {
124     int cmp = 0, mid, lo, hi;
125     void *p;
126     short *indxp;
127     char *info;
128
129     dict_bf_readp (dict->dbf, ptr, &p);
130     if (!p)
131         return 0;
132     mid = lo = 0;
133     hi = DICT_nodir(p)-1;
134     indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));    
135     while (lo <= hi)
136     {
137         mid = (lo+hi)/2;
138         if (indxp[-mid] > 0)
139         {
140             /* string (Dict_char *) DICT_EOS terminated */
141             /* unsigned char        length of information */
142             /* char *               information */
143             info = (char*)p + indxp[-mid];
144             cmp = dict_strcmp ((Dict_char*) info, str + pos);
145             if (!cmp)
146             {
147                 if (*after)
148                 {
149                     (*userfunc)((char *) str, info+
150                                 (dict_strlen((Dict_char*) info)+1)
151                                 *sizeof(Dict_char), 
152                                 *after, client);
153                     --(*after);
154                 }
155                 break;
156             }
157         }
158         else
159         {
160             Dict_char dc;
161             Dict_ptr subptr;
162
163             /* Dict_ptr             subptr */
164             /* Dict_char            sub char */
165             /* unsigned char        length of information */
166             /* char *               information */
167             info = (char*)p - indxp[-mid];
168             memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
169             cmp = dc - str[pos];
170             if (!cmp)
171             {
172                 memcpy (&subptr, info, sizeof(Dict_ptr));
173                 if (str[pos+1] == DICT_EOS)
174                 {
175                     if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
176                     {
177                         if (*after)
178                         {
179                             (*userfunc)((char*) str,
180                                         info+sizeof(Dict_ptr)+
181                                         sizeof(Dict_char),
182                                         *after, client);
183                             --(*after);
184                         }
185                     }
186                     if (*after && subptr)
187                         if (dict_scan_trav (dict, subptr, pos+1, str, 0, 
188                                             after, client, userfunc, 1))
189                             return 1;
190                 }
191                 else if (subptr)
192                 {
193                     if (dict_scan_r (dict, subptr, pos+1, str, before, after,
194                                      client, userfunc))
195                         return 1;
196                 }
197                 break;
198             }
199         }
200         if (cmp < 0)
201             lo = mid+1;
202         else
203             hi = mid-1;
204     }
205     if (lo>hi && cmp < 0)
206         ++mid;
207     if (*after)
208         if (dict_scan_trav (dict, ptr, pos, str, cmp ? mid : mid+1, after,
209                             client, userfunc, 1))
210             return 1;
211     if (*before && mid > 1)
212         if (dict_scan_trav (dict, ptr, pos, str, mid-1, before, 
213                             client, userfunc, -1))
214             return 1;
215     return 0;
216 }
217
218 int dict_scan (Dict dict, char *str, int *before, int *after, void *client,
219                int (*f)(char *name, const char *info, int pos, void *client))
220 {
221     int i;
222
223     logf (LOG_DEBUG, "dict_scan");
224     for (i = 0; str[i]; i++)
225     {
226         logf (LOG_DEBUG, " %3d  %c", str[i],
227               (str[i] > ' ' && str[i] < 127) ? str[i] : '?');
228     }
229     if (dict->head.last <= 1)
230         return 0;
231     return dict_scan_r (dict, 1, 0, (Dict_char *) str, before, after, client,
232                         f);
233 }
234