Updated dictionary. Implemented "compression" of dictionary.
[idzebra-moved-to-github.git] / dict / scan.c
1 /*
2  * Copyright (C) 1994-1999, Index Data
3  * All rights reserved.
4  * Sebastian Hammer, Adam Dickmeiss
5  *
6  * $Log: scan.c,v $
7  * Revision 1.13  1999-05-15 14:36:37  adam
8  * Updated dictionary. Implemented "compression" of dictionary.
9  *
10  * Revision 1.12  1999/02/02 14:50:28  adam
11  * Updated WIN32 code specific sections. Changed header.
12  *
13  * Revision 1.11  1998/06/22 11:34:45  adam
14  * Changed scan callback function so it doesn't stop further scanning.
15  *
16  * Revision 1.10  1998/03/06 16:58:04  adam
17  * Fixed bug which related to scanning of large indexes.
18  *
19  * Revision 1.9  1997/10/27 14:33:04  adam
20  * Moved towards generic character mapping depending on "structure"
21  * field in abstract syntax file. Fixed a few memory leaks. Fixed
22  * bug with negative integers when doing searches with relational
23  * operators.
24  *
25  * Revision 1.8  1996/02/02 13:43:52  adam
26  * The public functions simply use char instead of Dict_char to represent
27  * search strings. Dict_char is used internally only.
28  *
29  * Revision 1.7  1995/12/11  09:04:50  adam
30  * Bug fix: the lookup/scan/lookgrep didn't handle empty dictionary.
31  *
32  * Revision 1.6  1995/11/20  11:58:04  adam
33  * Support for YAZ in standard located directories, such as /usr/local/..
34  *
35  * Revision 1.5  1995/10/09  16:18:32  adam
36  * Function dict_lookup_grep got extra client data parameter.
37  *
38  * Revision 1.4  1995/10/06  13:52:00  adam
39  * Bug fixes. Handler may abort further scanning.
40  *
41  * Revision 1.3  1995/10/06  11:06:07  adam
42  * Bug fixes.
43  *
44  * Revision 1.2  1995/10/06  10:43:16  adam
45  * Minor changes.
46  *
47  * Revision 1.1  1995/10/06  09:04:18  adam
48  * First version of scan.
49  *
50  */
51 #include <stdlib.h>
52 #include <string.h>
53 #include <stdio.h>
54 #include <assert.h>
55
56 #include <dict.h>
57
58 int dict_scan_trav (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, 
59                     int start, int *count, void *client,
60                     int (*userfunc)(char *, const char *, int, void *),
61                     int dir)
62 {
63     int lo, hi, j;
64     void *p;
65     short *indxp;
66     char *info;
67
68     dict_bf_readp (dict->dbf, ptr, &p);
69     hi = DICT_nodir(p)-1;
70     if (start == 0 && dir == -1)
71         lo = hi;
72     else
73         lo = start;
74     indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); 
75
76     while (lo <= hi && lo >= 0 && *count > 0)
77     {
78         if (indxp[-lo] > 0)
79         {
80             /* string (Dict_char *) DICT_EOS terminated */
81             /* unsigned char        length of information */
82             /* char *               information */
83
84             info = (char*)p + indxp[-lo];
85             for (j = 0; info[j] != DICT_EOS; j++)
86                 str[pos+j] = info[j];
87             str[pos+j] = DICT_EOS;
88             (*userfunc)((char*) str, info+(j+1)*sizeof(Dict_char),
89                             *count * dir, client);
90             --(*count);
91         }
92         else
93         {
94             Dict_char dc;
95             Dict_ptr subptr;
96
97             /* Dict_ptr             subptr */
98             /* Dict_char            sub char */
99             /* unsigned char        length of information */
100             /* char *               information */
101
102             info = (char*)p - indxp[-lo];
103             memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
104             str[pos] = dc;
105             memcpy (&subptr, info, sizeof(Dict_ptr));
106             if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
107             {
108                  str[pos+1] = DICT_EOS;
109                  if ((*userfunc)((char*) str,
110                                  info+sizeof(Dict_ptr)+sizeof(Dict_char),
111                                  *count * dir, client))
112                      return 1;
113                  --(*count);
114             }
115             if (*count > 0 && subptr)
116             {
117                 dict_scan_trav (dict, subptr, pos+1, str, 0, count, 
118                                 client, userfunc, dir);
119                 dict_bf_readp (dict->dbf, ptr, &p);
120                 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); 
121             }
122         }
123         lo += dir;
124     }
125     return 0;
126 }
127
128 int dict_scan_r (Dict dict, Dict_ptr ptr, int pos, Dict_char *str, 
129                  int *before, int *after, void *client,
130                  int (*userfunc)(char *, const char *, int, void *))
131 {
132     int cmp = 0, mid, lo, hi;
133     void *p;
134     short *indxp;
135     char *info;
136
137     dict_bf_readp (dict->dbf, ptr, &p);
138     if (!p)
139         return 0;
140     mid = lo = 0;
141     hi = DICT_nodir(p)-1;
142     indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
143     while (lo <= hi)
144     {
145         mid = (lo+hi)/2;
146         if (indxp[-mid] > 0)
147         {
148             /* string (Dict_char *) DICT_EOS terminated */
149             /* unsigned char        length of information */
150             /* char *               information */
151             info = (char*)p + indxp[-mid];
152             cmp = dict_strcmp ((Dict_char*) info, str + pos);
153             if (!cmp)
154             {
155                 if (*after)
156                 {
157                     (*userfunc)((char *) str, info+
158                                 (dict_strlen((Dict_char*) info)+1)
159                                 *sizeof(Dict_char), 
160                                 *after, client);
161                     --(*after);
162                 }
163                 break;
164             }
165         }
166         else
167         {
168             Dict_char dc;
169             Dict_ptr subptr;
170
171             /* Dict_ptr             subptr */
172             /* Dict_char            sub char */
173             /* unsigned char        length of information */
174             /* char *               information */
175             info = (char*)p - indxp[-mid];
176             memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
177             cmp = dc - str[pos];
178             if (!cmp)
179             {
180                 memcpy (&subptr, info, sizeof(Dict_ptr));
181                 if (str[pos+1] == DICT_EOS)
182                 {
183                     if (info[sizeof(Dict_ptr)+sizeof(Dict_char)])
184                     {
185                         if (*after)
186                         {
187                             (*userfunc)((char*) str,
188                                         info+sizeof(Dict_ptr)+
189                                         sizeof(Dict_char),
190                                         *after, client);
191                             --(*after);
192                         }
193                     }
194                     if (*after && subptr)
195                         if (dict_scan_trav (dict, subptr, pos+1, str, 0, 
196                                             after, client, userfunc, 1))
197                             return 1;
198                 }
199                 else if (subptr)
200                 {
201                     if (dict_scan_r (dict, subptr, pos+1, str, before, after,
202                                      client, userfunc))
203                         return 1;
204                 }
205                 break;
206             }
207         }
208         if (cmp < 0)
209             lo = mid+1;
210         else
211             hi = mid-1;
212     }
213     if (lo>hi && cmp < 0)
214         ++mid;
215     if (*after)
216         if (dict_scan_trav (dict, ptr, pos, str, cmp ? mid : mid+1, after,
217                             client, userfunc, 1))
218             return 1;
219     if (*before && mid > 1)
220         if (dict_scan_trav (dict, ptr, pos, str, mid-1, before, 
221                             client, userfunc, -1))
222             return 1;
223     return 0;
224 }
225
226 int dict_scan (Dict dict, char *str, int *before, int *after, void *client,
227                int (*f)(char *name, const char *info, int pos, void *client))
228 {
229     int i;
230
231     logf (LOG_DEBUG, "dict_scan");
232     for (i = 0; str[i]; i++)
233     {
234         logf (LOG_DEBUG, " %3d  %c", str[i],
235               (str[i] > ' ' && str[i] < 127) ? str[i] : '?');
236     }
237     if (!dict->head.root)
238         return 0;
239     return dict_scan_r (dict, dict->head.root, 0, (Dict_char *) str,
240                         before, after, client, f);
241 }