From: Adam Dickmeiss Date: Tue, 5 Dec 2000 09:59:10 +0000 (+0000) Subject: Work on dict_delete_subtree. X-Git-Tag: ZEBRA.1.1~46 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=3b7b0e72cc359c7df837fe7ff1252f2efecd41c7 Work on dict_delete_subtree. --- diff --git a/dict/delete.c b/dict/delete.c index 3a9912f..8b217b1 100644 --- a/dict/delete.c +++ b/dict/delete.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: delete.c,v $ - * Revision 1.6 1999-05-15 14:36:37 adam + * Revision 1.7 2000-12-05 09:59:10 adam + * Work on dict_delete_subtree. + * + * Revision 1.6 1999/05/15 14:36:37 adam * Updated dictionary. Implemented "compression" of dictionary. * * Revision 1.5 1999/02/02 14:50:17 adam @@ -33,7 +36,71 @@ #include -static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) +static void dict_del_subtree (Dict dict, Dict_ptr ptr, + void *client, + int (*f)(const char *, void *)) +{ + int more = 1; + void *p = 0; + + if (!ptr) + return; + + while (more) + { + short *indxp; + int i, hi; + + dict_bf_readp (dict->dbf, ptr, &p); + hi = DICT_nodir(p)-1; + indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short)); + more = 0; + for (i = 0; i <= hi; i++) + { + if (indxp[-i] > 0) + { + /* string (Dict_char *) DICT_EOS terminated */ + /* unsigned char length of information */ + /* char * information */ + char *info = (char*)p + indxp[-i]; + if (f) + (*f)(info + (dict_strlen((Dict_char*) info)+1) + *sizeof(Dict_char), client); + } + else + { + + Dict_ptr subptr; + + /* Dict_ptr subptr */ + /* Dict_char sub char */ + /* unsigned char length of information */ + /* char * information */ + char *info = (char*)p - indxp[-i]; + memcpy (&subptr, info, sizeof(Dict_ptr)); + + if (info[sizeof(Dict_ptr)+sizeof(Dict_char)]) + { + if (f) + (*f)(info+sizeof(Dict_ptr)+sizeof(Dict_char), client); + } + if (subptr) + { + dict_del_subtree (dict, subptr, client, f); + more = 1; + break; + } + } + } + } + DICT_backptr(p) = dict->head.freelist; + dict->head.freelist = ptr; + dict_bf_touch (dict->dbf, ptr); +} + +static int dict_del_string (Dict dict, const Dict_char *str, Dict_ptr ptr, + int sub_flag, void *client, + int (*f)(const char *, void *)) { int mid, lo, hi; int cmp; @@ -41,6 +108,8 @@ static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) short *indxp; char *info; + if (!ptr) + return 0; dict_bf_readp (dict->dbf, ptr, &p); mid = lo = 0; hi = DICT_nodir(p)-1; @@ -54,20 +123,49 @@ static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) /* unsigned char length of information */ /* char * information */ info = (char*)p + indxp[-mid]; - cmp = dict_strcmp((Dict_char*) info, str); - if (!cmp) - { - hi = DICT_nodir(p)-1; - while (mid < hi) - { - indxp[-mid] = indxp[-mid-1]; - mid++; - } - DICT_type(p) = 1; - (DICT_nodir(p))--; - dict_bf_touch (dict->dbf, ptr); - return 1; - } + + cmp = dict_strcmp((Dict_char*) info, str); + if (sub_flag) + { + /* determine if prefix match */ + if (!dict_strncmp (str, (Dict_char*) info, dict_strlen(str))) + { + if (f) + (*f)(info + (dict_strlen((Dict_char*) info)+1) + *sizeof(Dict_char), client); + + hi = DICT_nodir(p)-1; + while (mid < hi) + { + indxp[-mid] = indxp[-mid-1]; + mid++; + } + DICT_type(p) = 1; + (DICT_nodir(p))--; + dict_bf_touch (dict->dbf, ptr); + --hi; + mid = lo = 0; + /* start again (may not be the most efficient way to go)*/ + continue; + } + } + else + { + /* normal delete: delete if equal */ + if (!cmp) + { + hi = DICT_nodir(p)-1; + while (mid < hi) + { + indxp[-mid] = indxp[-mid-1]; + mid++; + } + DICT_type(p) = 1; + (DICT_nodir(p))--; + dict_bf_touch (dict->dbf, ptr); + return 1; + } + } } else { @@ -86,13 +184,30 @@ static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) memcpy (&subptr, info, sizeof(Dict_ptr)); if (*++str == DICT_EOS) { + if (sub_flag && subptr) + { + Dict null_ptr = 0; + memcpy (info, &null_ptr, sizeof(Dict_ptr)); + } if (info[sizeof(Dict_ptr)+sizeof(Dict_char)]) { info[sizeof(Dict_ptr)+sizeof(Dict_char)] = 0; DICT_type(p) = 1; dict_bf_touch (dict->dbf, ptr); + + if (f) + (*f)(info+sizeof(Dict_ptr)+sizeof(Dict_char), + client); + if (sub_flag && subptr) + dict_del_subtree (dict, subptr, client, f); return 1; } + if (sub_flag && subptr) + { + DICT_type(p) = 1; + dict_bf_touch (dict->dbf, ptr); + dict_del_subtree (dict, subptr, client, f); + } return 0; } else @@ -118,7 +233,13 @@ static int dict_del (Dict dict, const Dict_char *str, Dict_ptr ptr) int dict_delete (Dict dict, const char *p) { - if (!dict->head.root) - return 0; - return dict_del (dict, (const Dict_char*) p, dict->head.root); + return dict_del_string (dict, (const Dict_char*) p, dict->head.root, 0, + 0, 0); +} + +int dict_delete_subtree (Dict dict, const char *p, void *client, + int (*f)(const char *info, void *client)) +{ + return dict_del_string (dict, (const Dict_char*) p, dict->head.root, 1, + client, f); } diff --git a/dict/dicttest.c b/dict/dicttest.c index 28d971c..57f1119 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dicttest.c,v $ - * Revision 1.24 2000-09-05 14:04:05 adam + * Revision 1.25 2000-12-05 09:59:10 adam + * Work on dict_delete_subtree. + * + * Revision 1.24 2000/09/05 14:04:05 adam * Updates for prefix 'yaz_' for YAZ log functions. * * Revision 1.23 2000/07/07 12:49:20 adam @@ -100,13 +103,19 @@ static Dict dict; static int look_hits; -static int grep_handle (char *name, const char *info, void *client) +static int grep_handler (char *name, const char *info, void *client) { look_hits++; printf ("%s\n", name); return 0; } +static int scan_handler (char *name, const char *info, int pos, void *client) +{ + printf ("%s\n", name); + return 0; +} + int main (int argc, char **argv) { Res my_resource = 0; @@ -114,6 +123,8 @@ int main (int argc, char **argv) const char *name = NULL; const char *inputfile = NULL; const char *config = NULL; + const char *delete_term = NULL; + int scan_the_thing = 0; int do_delete = 0; int range = -1; int srange = 0; @@ -133,10 +144,11 @@ int main (int argc, char **argv) if (argc < 2) { fprintf (stderr, "usage:\n " - " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]" - " [-w] [-c n] config file\n\n", + " %s [-d] [-D t] [-S] [-r n] [-p n] [-u] [-g pat] [-s n] " + "[-v n] [-i f] [-w] [-c n] config file\n\n", prog); fprintf (stderr, " -d delete instead of insert\n"); + fprintf (stderr, " -D t delete subtree instead of insert\n"); fprintf (stderr, " -r n set regular match range\n"); fprintf (stderr, " -p n set regular match start range\n"); fprintf (stderr, " -u report if keys change during insert\n"); @@ -146,9 +158,10 @@ int main (int argc, char **argv) fprintf (stderr, " -i f read file with words\n"); fprintf (stderr, " -w insert/delete instead of lookup\n"); fprintf (stderr, " -c n cache size (number of pages)\n"); + fprintf (stderr, " -S scan the dictionary\n"); exit (1); } - while ((ret = options ("dr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) + while ((ret = options ("D:Sdr:p:ug:s:v:i:wc:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -162,6 +175,10 @@ int main (int argc, char **argv) exit (1); } } + else if (ret == 'D') + { + delete_term = arg; + } else if (ret == 'd') do_delete = 1; else if (ret == 'g') @@ -190,6 +207,8 @@ int main (int argc, char **argv) rw = 1; else if (ret == 'i') inputfile = arg; + else if (ret == 'S') + scan_the_thing = 1; else if (ret == 's') { infosize = atoi(arg); @@ -257,7 +276,7 @@ int main (int argc, char **argv) ipf_ptr[i++] = '\0'; if (rw) { - if (do_delete) + if (do_delete) switch (dict_delete (dict, ipf_ptr)) { case 0: @@ -299,7 +318,7 @@ int main (int argc, char **argv) { look_hits = 0; dict_lookup_grep (dict, ipf_ptr, range, NULL, - &max_pos, srange, grep_handle); + &max_pos, srange, grep_handler); if (look_hits) no_of_hits++; else @@ -317,13 +336,18 @@ int main (int argc, char **argv) } fclose (ipf); } + if (rw && delete_term) + { + logf (LOG_LOG, "dict_delete_subtree %s", delete_term); + dict_delete_subtree (dict, delete_term, 0, 0); + } if (grep_pattern) { if (range < 0) range = 0; logf (LOG_LOG, "Grepping '%s'", grep_pattern); dict_lookup_grep (dict, grep_pattern, range, NULL, &max_pos, - srange, grep_handle); + srange, grep_handler); } if (rw) { @@ -345,6 +369,17 @@ int main (int argc, char **argv) logf (LOG_LOG, "No of hits.... %d", no_of_hits); logf (LOG_LOG, "No of misses.. %d", no_of_misses); } + if (scan_the_thing) + { + char term_dict[1024]; + + int before = 1000000; + int after = 1000000; + logf (LOG_LOG, "dict_scan"); + term_dict[0] = 1; + term_dict[1] = 0; + dict_scan (dict, term_dict, &before, &after, 0, scan_handler); + } dict_close (dict); bfs_destroy (bfs); res_close (my_resource); diff --git a/dict/open.c b/dict/open.c index 9597b86..d96f989 100644 --- a/dict/open.c +++ b/dict/open.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: open.c,v $ - * Revision 1.16 1999-05-26 07:49:13 adam + * Revision 1.17 2000-12-05 09:59:10 adam + * Work on dict_delete_subtree. + * + * Revision 1.16 1999/05/26 07:49:13 adam * C++ compilation. * * Revision 1.15 1999/05/15 14:36:37 adam @@ -132,6 +135,11 @@ int dict_strcmp (const Dict_char *s1, const Dict_char *s2) return strcmp ((const char *) s1, (const char *) s2); } +int dict_strncmp (const Dict_char *s1, const Dict_char *s2, size_t n) +{ + return strncmp ((const char *) s1, (const char *) s2, n); +} + int dict_strlen (const Dict_char *s) { return strlen((const char *) s); diff --git a/include/dict.h b/include/dict.h index ca82337..ed1e61f 100644 --- a/include/dict.h +++ b/include/dict.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2000, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dict.h,v $ - * Revision 1.30 1999-11-30 13:48:03 adam + * Revision 1.31 2000-12-05 09:59:10 adam + * Work on dict_delete_subtree. + * + * Revision 1.30 1999/11/30 13:48:03 adam * Improved installation. Updated for inclusion of YAZ header files. * * Revision 1.29 1999/05/15 14:36:37 adam @@ -181,6 +184,8 @@ Dict dict_open (BFiles bfs, const char *name, int cache, int rw, int dict_close (Dict dict); int dict_insert (Dict dict, const char *p, int userlen, void *userinfo); int dict_delete (Dict dict, const char *p); +int dict_delete_subtree (Dict dict, const char *p, void *client, + int (*f)(const char *info, void *client)); char *dict_lookup (Dict dict, const char *p); int dict_lookup_ec (Dict dict, char *p, int range, int (*f)(char *name)); @@ -189,6 +194,7 @@ int dict_lookup_grep (Dict dict, const char *p, int range, void *client, int (*f)(char *name, const char *info, void *client)); int dict_strcmp (const Dict_char *s1, const Dict_char *s2); +int dict_strncmp (const Dict_char *s1, const Dict_char *s2, size_t n); int dict_strlen (const Dict_char *s); int dict_scan (Dict dict, char *str, int *before, int *after, void *client,