From 0035afa7de3e06c18bf2f559649f34913114ab46 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 16 Sep 1994 15:39:10 +0000 Subject: [PATCH] Initial code of lookup - not tested yet. --- dict/Makefile | 15 ++++-- dict/dictext.c | 90 ++++++++++++++++++++++++++++++++++++ dict/dicttest.c | 26 +++++++---- dict/insert.c | 137 +++++-------------------------------------------------- dict/lookup.c | 75 ++++++++++++++++++++++++++++-- include/dict.h | 7 ++- 6 files changed, 206 insertions(+), 144 deletions(-) create mode 100644 dict/dictext.c diff --git a/dict/Makefile b/dict/Makefile index df2c5c7..7e28f7d 100644 --- a/dict/Makefile +++ b/dict/Makefile @@ -1,11 +1,12 @@ # Copyright (C) 1994, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.6 1994-09-12 08:06:41 adam Exp $ +# $Id: Makefile,v 1.7 1994-09-16 15:39:10 adam Exp $ SHELL=/bin/sh INCLUDE=-I../include -TPROG=dicttest +TPROG1=dicttest +TPROG2=dictext CFLAGS=-O -Wall -g -pedantic DEFS=$(INCLUDE) LIB=../lib/dict.a @@ -14,8 +15,12 @@ CPP=cc -E all: $(LIB) -$(TPROG): $(TPROG).o $(LIB) - $(CC) $(CFLAGS) -o $(TPROG) $(TPROG).o $(LIB) ../lib/bfile.a ../lib/util.a +$(TPROG1): $(TPROG1).o $(LIB) + $(CC) $(CFLAGS) -o $(TPROG1) $(TPROG1).o $(LIB) ../lib/bfile.a ../lib/util.a + +$(TPROG2): $(TPROG2).o $(LIB) + $(CC) $(CFLAGS) -o $(TPROG2) $(TPROG2).o ../lib/util.a + $(LIB): $(PO) rm -f $(LIB) @@ -26,7 +31,7 @@ $(LIB): $(PO) $(CC) -c $(DEFS) $(CFLAGS) $< clean: - rm -f *.[oa] $(TPROG) core mon.out gmon.out errlist + rm -f *.[oa] $(TPROG1) $(TPROG2) core mon.out gmon.out errlist dep depend: $(CPP) $(INCLUDE) -M *.c >.depend diff --git a/dict/dictext.c b/dict/dictext.c new file mode 100644 index 0000000..cec1845 --- /dev/null +++ b/dict/dictext.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: dictext.c,v $ + * Revision 1.1 1994-09-16 15:39:11 adam + * Initial code of lookup - not tested yet. + * + */ + +#include +#include +#include +#include +#include + +#include + +char *prog; + +int main (int argc, char **argv) +{ + char *arg; + int ret; + char *inputfile = NULL; + FILE *ipf = stdin; + char ipf_buf[1024]; + + prog = *argv; + while ((ret = options ("vh", argv, argc, &arg)) != -2) + { + if (ret == 0) + { + if (!inputfile) + inputfile = arg; + else + { + log (LOG_FATAL, "too many files specified\n"); + exit (1); + } + } + else if (ret == 'v') + { + log_init (atoi(arg), prog, NULL); + } + else if (ret == 'h') + { + fprintf (stderr, "usage:\n" + " %s [-h] [-v n] [file]\n", prog); + exit (1); + } + else + { + log (LOG_FATAL, "unknown option"); + exit (1); + } + } + if (inputfile) + { + ipf = fopen (inputfile, "r"); + if (!ipf) + { + log (LOG_FATAL|LOG_ERRNO, "cannot open '%s'", inputfile); + exit (1); + } + } + while (fgets (ipf_buf, 1023, ipf)) + { + char *ipf_ptr = ipf_buf; + for (;*ipf_ptr && *ipf_ptr != '\n';ipf_ptr++) + { + if (isalpha(*ipf_ptr) || *ipf_ptr == '_') + { + int i = 1; + while (ipf_ptr[i] && (isalnum(ipf_ptr[i]) || + ipf_ptr[i] == '_')) + i++; + if (ipf_ptr[i]) + ipf_ptr[i++] = '\0'; + printf ("%s\n", ipf_ptr); + ipf_ptr += (i-1); + } + } + } + return 0; +} + + + diff --git a/dict/dicttest.c b/dict/dicttest.c index 52dadb1..1451548 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dicttest.c,v $ - * Revision 1.5 1994-09-06 13:05:14 adam + * Revision 1.6 1994-09-16 15:39:12 adam + * Initial code of lookup - not tested yet. + * + * Revision 1.5 1994/09/06 13:05:14 adam * Further development of insertion. Some special cases are * not properly handled yet! assert(0) are put here. The * binary search in each page definitely reduce usr CPU. @@ -44,17 +47,18 @@ int main (int argc, char **argv) int ret; int no_of_insertions = 0; int no_of_new = 0, no_of_same = 0, no_of_change = 0; + int unique = 0; char *arg; prog = argv[0]; if (argc < 2) { fprintf (stderr, "usage:\n" - " %s [-s n] [-v n] [-i f] [-w] [-c n] base file\n", + " %s [-u] [-s n] [-v n] [-i f] [-w] [-c n] base file\n", prog); exit (1); } - while ((ret = options ("s:v:i:wc:", argv, argc, &arg)) != -2) + while ((ret = options ("us:v:i:wc:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -68,6 +72,10 @@ int main (int argc, char **argv) exit (1); } } + else if (ret == 'u') + { + unique = 1; + } else if (ret == 'c') { cache = atoi(arg); @@ -115,7 +123,7 @@ int main (int argc, char **argv) if (inputfile) { FILE *ipf; - char ipf_buf[256]; + char ipf_buf[1024]; int line = 1; char infobytes[120]; memset (infobytes, 0, 120); @@ -126,7 +134,7 @@ int main (int argc, char **argv) exit (1); } - while (fgets (ipf_buf, 255, ipf)) + while (fgets (ipf_buf, 1023, ipf)) { char *ipf_ptr = ipf_buf; sprintf (infobytes, "%d", line); @@ -140,7 +148,6 @@ int main (int argc, char **argv) i++; if (ipf_ptr[i]) ipf_ptr[i++] = '\0'; -#if 1 switch(dict_insert (dict, ipf_ptr, infosize, infobytes)) { case 0: @@ -148,14 +155,15 @@ int main (int argc, char **argv) break; case 1: no_of_change++; + if (unique) + log (LOG_LOG, "%s change\n", ipf_ptr); break; case 2: + if (unique) + log (LOG_LOG, "%s duplicate\n", ipf_ptr); no_of_same++; break; } -#else - printf ("%s\n", ipf_ptr); -#endif ++no_of_insertions; ipf_ptr += (i-1); } diff --git a/dict/insert.c b/dict/insert.c index 9e9cf72..ff99b66 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: insert.c,v $ - * Revision 1.8 1994-09-16 12:35:01 adam + * Revision 1.9 1994-09-16 15:39:13 adam + * Initial code of lookup - not tested yet. + * + * Revision 1.8 1994/09/16 12:35:01 adam * New version of split_page which use clean_page for splitting. * * Revision 1.7 1994/09/12 08:06:42 adam @@ -80,12 +83,13 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) void *subp; char *info_here; Dict_ptr subptr; - int i, need; - short *indxp, *best_indxp; + int i; + short *indxp, *best_indxp = NULL; Dict_char best_char = 0; Dict_char prev_char = 0; int best_no = -1, no_current = 1; + /* determine splitting char... */ indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp) { @@ -124,7 +128,7 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) info_here = NULL; for (indxp=best_indxp, i=0; i 0); @@ -139,112 +143,16 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) assert (!info_here); info_here = info+(slen+1)*sizeof(Dict_char); } - } - /* calculate the amount of bytes needed for this entry when */ - /* transformed to a sub entry */ - need = sizeof(Dict_char)+sizeof(Dict_ptr)+1; - if (info_here) - need += *info_here; - - indxp = best_indxp; - /* now loop on all entries with string length > 1 i.e. all */ - /* those entries which contribute to a sub page */ - best_indxp = NULL; - for (i=0; i 0); - - info = (char*) p + *indxp; /* entry start */ - assert (*info == best_char); - slen = dict_strlen(info); - - if (slen > 1) + else { info1 = info+(1+slen)*sizeof(Dict_char); /* info start */ - - if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1) - best_indxp = indxp; /* space for entry */ dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1); dict_bf_readp (dict->dbf, ptr, &p); } } -#if 1 /* now clean the page ... */ clean_page (dict, ptr, p, &best_char, subptr, info_here); return 0; -#endif - if (best_indxp) - { /* there was a hole big enough for a sub entry */ - char *info = (char*) p + *best_indxp; - short *indxp1; - - *--indxp = - *best_indxp; - DICT_type(p) = 1; - DICT_nodir (p) -= (best_no-1); - indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short)); - while (indxp != indxp1) - { - --indxp; - *indxp = indxp[1-best_no]; - } - memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */ - info += sizeof(Dict_ptr); - memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */ - info += sizeof(Dict_char); - if (info_here) - memcpy (info, info_here, *info_here+1); /* with information */ - else - *info = 0; /* without info */ -#if CHECK - best_indxp = NULL; - prev_char = 0; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); - for (i = DICT_nodir (p); --i >= 0; --indxp) - { - if (*indxp > 0) /* tail string here! */ - { - Dict_char dc; - - memcpy (&dc, (char*) p + *indxp, sizeof(dc)); - assert (dc != best_char); - assert (dc >= prev_char); - prev_char = dc; - } - else - { - Dict_char dc; - memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr), - sizeof(dc)); - assert (dc > prev_char); - if (dc == best_char) - { - assert (best_indxp == NULL); - best_indxp = indxp; - } - prev_char = dc; - } - } - assert (best_indxp); -#endif - } - else - { - short *indxp1, *indxp2; - assert (0); - DICT_type(p) = 1; - DICT_nodir(p) -= best_no; - indxp2 = indxp; - indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short)); - do - { - --indxp2; - indxp2[0] = indxp2[-best_no]; - } while (indxp2 != indxp1); - } - return 0; } static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, @@ -318,6 +226,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, DICT_type(p) = 0; DICT_nodir(p) = no; xfree (np); + dict_bf_touch (dict->dbf, ptr); } @@ -329,7 +238,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo) { - int hi, lo, mid, i, slen, cmp = 1; + int hi, lo, mid, slen, cmp = 1; Dict_ptr ptr = back_ptr; short *indxp; char *info; @@ -429,7 +338,6 @@ static int dict_ins (Dict dict, const Dict_char *str, if (DICT_type(p) == 1) { clean_page (dict, ptr, p, NULL, 0, NULL); - dict_bf_touch (dict->dbf, ptr); return dict_ins (dict, str-1, ptr, userlen, userinfo); } @@ -482,28 +390,7 @@ static int dict_ins (Dict dict, const Dict_char *str, if (DICT_size(p)+slen+userlen >= DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ { - if (DICT_type(p) == 1) - { - clean_page (dict, ptr, p, NULL, 0, NULL); - dict_bf_touch (dict->dbf, ptr); - return dict_ins (dict, str, ptr, userlen, userinfo); - } - i = 0; - do - { - assert (i <= 1); - if (split_page (dict, ptr, p)) - { - log (LOG_FATAL, "Unable to split page %d\n", ptr); - abort (); - } - if (DICT_size(p)+slen+userlen < - DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) - break; - i++; - clean_page (dict, ptr, p, NULL, 0, NULL); - } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE - - (1+DICT_nodir(p))*sizeof(short)); + split_page (dict, ptr, p); return dict_ins (dict, str, ptr, userlen, userinfo); } if (cmp) diff --git a/dict/lookup.c b/dict/lookup.c index 0cb2260..3673e24 100644 --- a/dict/lookup.c +++ b/dict/lookup.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lookup.c,v $ - * Revision 1.1 1994-08-16 16:26:48 adam + * Revision 1.2 1994-09-16 15:39:14 adam + * Initial code of lookup - not tested yet. + * + * Revision 1.1 1994/08/16 16:26:48 adam * Added dict. * */ @@ -16,9 +19,75 @@ #include -int dict_lookup (Dict dict, Dict_char *p) +static char *dict_look (Dict dict, Dict_char *str) +{ + Dict_ptr ptr = 1; + int mid, lo, hi; + int cmp; + void *p; + short *indxp; + char *info; + + dict_bf_readp (dict->dbf, ptr, &p); + mid = lo = 0; + hi = DICT_nodir(p)-1; + indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + while (lo <= hi) + { + mid = (lo+hi)/2; + if (indxp[-mid] > 0) + { + /* string (Dict_char *) DICT_EOS terminated */ + /* unsigned char length of information */ + /* char * information */ + info = (char*)p + indxp[-mid]; + cmp = dict_strcmp((Dict_char*) info, str); + if (!cmp) + return info+(dict_strlen (info)+1)*sizeof(Dict_char); + } + else + { + Dict_char dc; + Dict_ptr subptr; + + /* Dict_ptr subptr */ + /* Dict_char sub char */ + /* unsigned char length of information */ + /* char * information */ + info = (char*)p - indxp[-mid]; + memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char)); + cmp = dc- *str; + if (!cmp) + { + memcpy (&subptr, info, sizeof(Dict_ptr)); + if (*++str == DICT_EOS) + return info+sizeof(Dict_ptr)+sizeof(Dict_char); + else + { + if (subptr == 0) + return NULL; + ptr = subptr; + dict_bf_readp (dict->dbf, ptr, &p); + mid = lo = 0; + hi = DICT_nodir(p)-1; + indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + continue; + } + } + } + if (cmp < 0) + lo = mid+1; + else + hi = mid-1; + } + return NULL; +} + +char *dict_lookup (Dict dict, Dict_char *p) { - return 0; + if (dict->head.last == 1) + return NULL; + return dict_look (dict, p); } diff --git a/include/dict.h b/include/dict.h index b5347bf..2d9a8ed 100644 --- a/include/dict.h +++ b/include/dict.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dict.h,v $ - * Revision 1.5 1994-09-06 13:05:29 adam + * Revision 1.6 1994-09-16 15:39:21 adam + * Initial code of lookup - not tested yet. + * + * Revision 1.5 1994/09/06 13:05:29 adam * Further development of insertion. Some special cases are * not properly handled yet! assert(0) are put here. The * binary search in each page definitely reduce usr CPU. @@ -84,7 +87,7 @@ int dict_bf_close (Dict_BFile dbf); Dict dict_open (const char *name, int cache, int rw); int dict_close (Dict dict); int dict_insert (Dict dict, const Dict_char *p, int userlen, void *userinfo); -int dict_lookup (Dict dict, Dict_char *p); +char *dict_lookup (Dict dict, Dict_char *p); int dict_strcmp (const Dict_char *s1, const Dict_char *s2); int dict_strlen (const Dict_char *s); -- 1.7.10.4