From 3c5ad6ec79e41c91b818e9953b08c6217795693d Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 16 Aug 1994 16:26:37 +0000 Subject: [PATCH] Added dict. --- Makefile | 4 +- dict/Makefile | 36 ++++++++++++++ dict/close.c | 27 +++++++++++ dict/dicttest.c | 111 +++++++++++++++++++++++++++++++++++++++++++ dict/insert.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ dict/lookup.c | 24 ++++++++++ dict/open.c | 82 ++++++++++++++++++++++++++++++++ include/dict.h | 67 ++++++++++++++++++++++++++ 8 files changed, 491 insertions(+), 2 deletions(-) create mode 100644 dict/Makefile create mode 100644 dict/close.c create mode 100644 dict/dicttest.c create mode 100644 dict/insert.c create mode 100644 dict/lookup.c create mode 100644 dict/open.c create mode 100644 include/dict.h diff --git a/Makefile b/Makefile index 69c3aa8..2285b75 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ # Copyright (C) 1994, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.3 1994-08-16 16:21:47 adam Exp $ +# $Id: Makefile,v 1.4 1994-08-16 16:26:37 adam Exp $ -SUBDIR=util bfile +SUBDIR=util bfile dict all: for i in $(SUBDIR); do (cd $$i; make); done diff --git a/dict/Makefile b/dict/Makefile new file mode 100644 index 0000000..a4c366a --- /dev/null +++ b/dict/Makefile @@ -0,0 +1,36 @@ +# Copyright (C) 1994, Index Data I/S +# All rights reserved. +# Sebastian Hammer, Adam Dickmeiss +# $Id: Makefile,v 1.1 1994-08-16 16:26:46 adam Exp $ + +SHELL=/bin/sh +INCLUDE=-I../include +TPROG=dicttest +CFLAGS=-g -Wall +DEFS=$(INCLUDE) +LIB=../lib/dict.a +PO = open.o close.o insert.o lookup.o +CPP=cc -E + +all: $(LIB) + +$(TPROG): $(TPROG).o $(LIB) + $(CC) -o $(TPROG) $(TPROG).o $(LIB) ../lib/bfile.a ../lib/util.a + +$(LIB): $(PO) + rm -f $(LIB) + ar qc $(LIB) $(PO) + ranlib $(LIB) + +.c.o: + $(CC) -c $(DEFS) $(CFLAGS) $< + +clean: + rm -f *.[oa] $(TPROG) core mon.out gmon.out + +dep depend: + $(CPP) $(INCLUDE) -M *.c >.depend + +#ifeq (.depend,$(wildcard .depend)) +include .depend +#endif diff --git a/dict/close.c b/dict/close.c new file mode 100644 index 0000000..3c8cddf --- /dev/null +++ b/dict/close.c @@ -0,0 +1,27 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: close.c,v $ + * Revision 1.1 1994-08-16 16:26:47 adam + * Added dict. + * + */ + +#include +#include +#include +#include + +#include + +int dict_close (Dict dict) +{ + assert (dict); + + bf_close (dict->bf); + free (dict); + return 0; +} + diff --git a/dict/dicttest.c b/dict/dicttest.c new file mode 100644 index 0000000..86a733d --- /dev/null +++ b/dict/dicttest.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: dicttest.c,v $ + * Revision 1.1 1994-08-16 16:26:47 adam + * Added dict. + * + */ + +#include +#include +#include + +#include +#include + +char *prog; +Dict dict; + +int main (int argc, char **argv) +{ + const char *name = NULL; + const char *inputfile = NULL; + int rw = 0; + int cache = 10; + int ret; + int verbose = 0; + char *arg; + + prog = argv[0]; + if (argc < 2) + { + fprintf (stderr, "usage:\n" + " %s [-v n] [-i f] [-w] [-c n] file\n", prog); + exit (1); + } + while ((ret = options ("v:i:wc:", argv, argc, &arg)) != -2) + { + if (ret == 0) + { + if (name) + { + fprintf (stderr, "%s: too many files specified\n", prog); + exit (1); + } + name = arg; + } + else if (ret == 'c') + { + cache = atoi(arg); + if (cache<2) + cache = 2; + } + else if (ret == 'w') + rw = 1; + else if (ret == 'i') + { + inputfile = arg; + rw = 1; + } + else if (ret == 'v') + verbose = atoi(arg); + else + { + fprintf (stderr, "%s: unknown option\n", prog); + exit (1); + } + } + if (!name) + { + fprintf (stderr, "%s: no dictionary file given\n", prog); + exit (1); + } + dict = dict_open (name, cache, rw); + if (!dict) + { + fprintf (stderr, "%s: dict_open fail\n", prog); + exit (1); + } + if (inputfile) + { + FILE *ipf; + char ipf_buf[256]; + char word[256]; + int i, line = 1; + + if (!(ipf = fopen(inputfile, "r"))) + { + fprintf (stderr, "%s: cannot open %s\n", prog, inputfile); + exit (1); + } + + while (fgets (ipf_buf, 255, ipf)) + { + for (i=0; i<255; i++) + if (ipf_buf[i] > ' ') + word[i] = ipf_buf[i]; + else + break; + word[i] = 0; + if (i) + dict_insert (dict, word, &line); + ++line; + } + fclose (ipf); + } + dict_close (dict); + return 0; +} diff --git a/dict/insert.c b/dict/insert.c new file mode 100644 index 0000000..94fe205 --- /dev/null +++ b/dict/insert.c @@ -0,0 +1,142 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: insert.c,v $ + * Revision 1.1 1994-08-16 16:26:48 adam + * Added dict. + * + */ + +#include +#include +#include +#include + +#include + +static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) +{ + void *p; + Dict_ptr ptr = dict->head.free_list; + if (dict->head.free_list == dict->head.last) + { + dict->head.free_list++; + dict->head.last = dict->head.free_list; + bf_newp (dict->bf, ptr, &p); + } + else + { + bf_readp (dict->bf, dict->head.free_list, &p); + dict->head.free_list = DICT_nextptr(p); + if (dict->head.free_list == 0) + dict->head.free_list = dict->head.last; + } + assert (p); + DICT_type(p) = 1; + DICT_backptr(p) = back_ptr; + DICT_nextptr(p) = 0; + DICT_nodir(p) = 0; + DICT_size(p) = 0; + *pp = p; + return ptr; +} + +static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr, + void *p, void *userinfo) +{ + Dict_ptr ptr = back_ptr, subptr; + short *indxp, *indxp1, *indxp2; + short newsize; + if (ptr == 0) + ptr = new_page (dict, back_ptr, &p); + assert (p); + assert (ptr); + + indxp = (short*) ((char*) p+DICT_PAGESIZE); + while (*str != DICT_EOS) + { + char *info; + if (*--indxp > 0) /* tail string here! */ + { + int cmp; + info = DICT_info(p) + *indxp; + cmp = dict_strcmp ((Dict_char*) + (info+sizeof(Dict_info)+sizeof(Dict_ptr)), + str); + if (!cmp) + { + if (memcmp (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo))) + { + memcpy (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo)); + bf_touch (dict->bf, ptr); + } + return 0; + } + else if(cmp < 0) + break; + + } + else if(*indxp < 0) /* tail of string in sub page */ + { + int cmp; + info = DICT_info(p) - *indxp; + cmp = memcmp (info+sizeof(Dict_info)+sizeof(Dict_ptr), str, + sizeof(Dict_char)); + if (!cmp) + { + Dict_ptr subptr; + void *pp; + memcpy (&subptr, info, sizeof(subptr)); + if (subptr == 0) + { + subptr = new_page (dict, ptr, &pp); + memcpy (info, &subptr, sizeof(subptr)); + bf_touch (dict->bf, ptr); + } + return dict_ins (dict, str+1, ptr, pp, userinfo); + } + else if(cmp < 0) + break; + } + else + break; + } + newsize = DICT_size(p); + subptr = 0; + memcpy (DICT_info(p) + newsize, &subptr, sizeof(subptr)); + memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr), userinfo, + sizeof(Dict_info)); + memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr)+sizeof(Dict_info), + str, dict_strlen (str)); + newsize = DICT_size(p) + + sizeof(Dict_info) + sizeof(Dict_ptr) + dict_strlen (str); + DICT_size (p) = newsize; + + DICT_nodir(p) = DICT_nodir(p)+1; + indxp2 = (short*)((char*) p + DICT_PAGESIZE - DICT_nodir(p)*sizeof(short)); + for (indxp1 = indxp2; indxp1 != indxp; indxp1++) + indxp[0] = indxp[1]; + *indxp = -newsize; + return 0; +} + +int dict_insert (Dict dict, const Dict_char *str, void *userinfo) +{ + dict_ins (dict, str, 0, NULL, userinfo); + return 0; +} + + + + + + + + + + + + + diff --git a/dict/lookup.c b/dict/lookup.c new file mode 100644 index 0000000..0cb2260 --- /dev/null +++ b/dict/lookup.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: lookup.c,v $ + * Revision 1.1 1994-08-16 16:26:48 adam + * Added dict. + * + */ + +#include +#include +#include +#include + +#include + +int dict_lookup (Dict dict, Dict_char *p) +{ + return 0; +} + + diff --git a/dict/open.c b/dict/open.c new file mode 100644 index 0000000..fda7f4f --- /dev/null +++ b/dict/open.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: open.c,v $ + * Revision 1.1 1994-08-16 16:26:49 adam + * Added dict. + * + */ + + +#include +#include +#include + +#include + +Dict dict_open (const char *name, int cache, int rw) +{ + Dict dict; + void *head_buf; + struct Dict_head *dh; + + dict = xmalloc (sizeof(*dict)); + + if (rw) + dict->bf = bf_open_w (name, DICT_PAGESIZE, cache); + else + dict->bf = bf_open (name, DICT_PAGESIZE, cache); + + if(!dict->bf) + { + free (dict); + return NULL; + } + if (bf_read (dict->bf, 0, &head_buf) <= 0) + { + if (rw) + { /* create header with information (page 0) */ + bf_newp (dict->bf, 0, &head_buf); + dh = (struct Dict_head *) head_buf; + strcpy(dh->magic_str, DICT_MAGIC); + dh->free_list = dh->last = 1; + dh->page_size = DICT_PAGESIZE; + memcpy (&dict->head, dh, sizeof(*dh)); + } + else + { /* no header present, i.e. no dictionary at all */ + dict->head.free_list = dict->head.last = 0; + dict->head.page_size = DICT_PAGESIZE; + } + } + else /* header was there, check magic and page size */ + { + dh = (struct Dict_head *) head_buf; + if (!strcmp (dh->magic_str, DICT_MAGIC)) + { + bf_close (dict->bf); + free (dict); + return NULL; + } + if (dh->page_size != DICT_PAGESIZE) + { + bf_close (dict->bf); + free (dict); + return NULL; + } + memcpy (&dict->head, dh, sizeof(*dh)); + } + return dict; +} + +int dict_strcmp (const Dict_char *s1, const Dict_char *s2) +{ + return strcmp (s1, s2); +} + +int dict_strlen (const Dict_char *s) +{ + return strlen(s)+1; +} diff --git a/include/dict.h b/include/dict.h new file mode 100644 index 0000000..42431a0 --- /dev/null +++ b/include/dict.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: dict.h,v $ + * Revision 1.1 1994-08-16 16:26:53 adam + * Added dict. + * + */ + +#ifndef DICT_H +#define DICT_H + +#include + +typedef unsigned Dict_ptr; +typedef char Dict_char; + +struct Dict_head { + char magic_str[8]; + int page_size; + Dict_ptr free_list, last; +}; + +typedef struct Dict_struct { + BFile bf; + struct Dict_head head; +} *Dict; + +#define DICT_MAGIC "dict00" + +typedef int Dict_info; + +#define DICT_PAGESIZE 8192 + +Dict dict_open (const char *name, int cache, int rw); +int dict_close (Dict dict); +int dict_insert (Dict dict, const Dict_char *p, void *userinfo); +int dict_lookup (Dict dict, Dict_char *p); +int dict_strcmp (const Dict_char *s1, const Dict_char *s2); +int dict_strlen (const Dict_char *s); + +#define DICT_EOS 0 +#define DICT_type(x) 0[(Dict_ptr*) x] +#define DICT_backptr(x) 1[(Dict_ptr*) x] +#define DICT_nextptr(x) 2[(Dict_ptr*) x] +#define DICT_nodir(x) 0[(short*)((char*)(x)+3*sizeof(Dict_ptr))] +#define DICT_size(x) 1[(short*)((char*)(x)+3*sizeof(Dict_ptr))] +#define DICT_info(x) ((char*)(x)+3*sizeof(Dict_ptr)+2*sizeof(short)) + +#define DICT_to_str(x) sizeof(Dict_info)+sizeof(Dict_ptr) + + +/* + type type of page + backptr pointer to parent + nextptr pointer to next page (if any) + nodir no of words + size size of strings,info,ptr entries + + dir[0..nodir-1] + ptr,info,string + */ + + +#endif -- 1.7.10.4