Added dict.
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 16 Aug 1994 16:26:37 +0000 (16:26 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 16 Aug 1994 16:26:37 +0000 (16:26 +0000)
Makefile
dict/Makefile [new file with mode: 0644]
dict/close.c [new file with mode: 0644]
dict/dicttest.c [new file with mode: 0644]
dict/insert.c [new file with mode: 0644]
dict/lookup.c [new file with mode: 0644]
dict/open.c [new file with mode: 0644]
include/dict.h [new file with mode: 0644]

index 69c3aa8..2285b75 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
 # Copyright (C) 1994, Index Data I/S 
 # All rights reserved.
 # Sebastian Hammer, Adam Dickmeiss
-# $Id: Makefile,v 1.3 1994-08-16 16:21:47 adam Exp $
+# $Id: Makefile,v 1.4 1994-08-16 16:26:37 adam Exp $
 
-SUBDIR=util bfile
+SUBDIR=util bfile dict
 
 all:
        for i in $(SUBDIR); do (cd $$i; make); done
diff --git a/dict/Makefile b/dict/Makefile
new file mode 100644 (file)
index 0000000..a4c366a
--- /dev/null
@@ -0,0 +1,36 @@
+# Copyright (C) 1994, Index Data I/S 
+# All rights reserved.
+# Sebastian Hammer, Adam Dickmeiss
+# $Id: Makefile,v 1.1 1994-08-16 16:26:46 adam Exp $
+
+SHELL=/bin/sh
+INCLUDE=-I../include
+TPROG=dicttest
+CFLAGS=-g -Wall
+DEFS=$(INCLUDE)
+LIB=../lib/dict.a 
+PO = open.o close.o insert.o lookup.o
+CPP=cc -E
+
+all: $(LIB)
+
+$(TPROG): $(TPROG).o $(LIB) 
+       $(CC) -o $(TPROG) $(TPROG).o $(LIB) ../lib/bfile.a ../lib/util.a
+
+$(LIB): $(PO)
+       rm -f $(LIB)
+       ar qc $(LIB) $(PO)
+       ranlib $(LIB)
+
+.c.o:
+       $(CC) -c $(DEFS) $(CFLAGS) $<
+
+clean:
+       rm -f *.[oa] $(TPROG) core mon.out gmon.out
+
+dep depend:
+       $(CPP) $(INCLUDE) -M *.c >.depend
+
+#ifeq (.depend,$(wildcard .depend))
+include .depend
+#endif
diff --git a/dict/close.c b/dict/close.c
new file mode 100644 (file)
index 0000000..3c8cddf
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: close.c,v $
+ * Revision 1.1  1994-08-16 16:26:47  adam
+ * Added dict.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include <dict.h>
+
+int dict_close (Dict dict)
+{
+    assert (dict);
+    
+    bf_close (dict->bf);
+    free (dict);
+    return 0;
+}
+
diff --git a/dict/dicttest.c b/dict/dicttest.c
new file mode 100644 (file)
index 0000000..86a733d
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: dicttest.c,v $
+ * Revision 1.1  1994-08-16 16:26:47  adam
+ * Added dict.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <dict.h>
+#include <options.h>
+
+char *prog;
+Dict dict;
+
+int main (int argc, char **argv)
+{
+    const char *name = NULL;
+    const char *inputfile = NULL;
+    int rw = 0;
+    int cache = 10;
+    int ret;
+    int verbose = 0;
+    char *arg;
+    
+    prog = argv[0];
+    if (argc < 2)
+    {
+        fprintf (stderr, "usage:\n"
+                         "  %s [-v n] [-i f] [-w] [-c n] file\n", prog);
+        exit (1);
+    }
+    while ((ret = options ("v:i:wc:", argv, argc, &arg)) != -2)
+    {
+        if (ret == 0)
+        {
+            if (name)
+            {
+                fprintf (stderr, "%s: too many files specified\n", prog);
+                exit (1);
+            }
+            name = arg;
+        }
+        else if (ret == 'c')
+        {
+            cache = atoi(arg);
+            if (cache<2)
+                cache = 2;
+        }
+        else if (ret == 'w')
+            rw = 1;
+        else if (ret == 'i')
+        {
+            inputfile = arg;
+            rw = 1;
+        }
+        else if (ret == 'v')
+            verbose = atoi(arg);
+        else
+        {
+            fprintf (stderr, "%s: unknown option\n", prog);
+            exit (1);
+        }
+    }
+    if (!name)
+    {
+        fprintf (stderr, "%s: no dictionary file given\n", prog);
+        exit (1);
+    }
+    dict = dict_open (name, cache, rw);
+    if (!dict)
+    {
+        fprintf (stderr, "%s: dict_open fail\n", prog);
+        exit (1);
+    }
+    if (inputfile)
+    {
+        FILE *ipf;
+        char ipf_buf[256];
+        char word[256];
+        int i, line = 1;
+
+        if (!(ipf = fopen(inputfile, "r")))
+        {
+            fprintf (stderr, "%s: cannot open %s\n", prog, inputfile);
+            exit (1);
+        }
+        
+        while (fgets (ipf_buf, 255, ipf))
+        {
+            for (i=0; i<255; i++)
+                if (ipf_buf[i] > ' ')
+                    word[i] = ipf_buf[i];
+                else
+                    break;
+            word[i] = 0;
+            if (i)
+                dict_insert (dict, word, &line);
+            ++line;
+        }
+        fclose (ipf);
+    }
+    dict_close (dict);
+    return 0;
+}
diff --git a/dict/insert.c b/dict/insert.c
new file mode 100644 (file)
index 0000000..94fe205
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: insert.c,v $
+ * Revision 1.1  1994-08-16 16:26:48  adam
+ * Added dict.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include <dict.h>
+
+static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
+{
+    void *p;
+    Dict_ptr ptr = dict->head.free_list;
+    if (dict->head.free_list == dict->head.last)
+    {
+        dict->head.free_list++;
+        dict->head.last = dict->head.free_list;
+        bf_newp (dict->bf, ptr, &p);
+    }
+    else
+    {
+        bf_readp (dict->bf, dict->head.free_list, &p);
+        dict->head.free_list = DICT_nextptr(p);
+        if (dict->head.free_list == 0)
+            dict->head.free_list = dict->head.last;
+    }
+    assert (p);
+    DICT_type(p) = 1;
+    DICT_backptr(p) = back_ptr;
+    DICT_nextptr(p) = 0;
+    DICT_nodir(p) = 0;
+    DICT_size(p) = 0;
+    *pp = p;
+    return ptr;
+}
+
+static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr,
+                     void *p, void *userinfo)
+{
+    Dict_ptr ptr = back_ptr, subptr;
+    short *indxp, *indxp1, *indxp2;
+    short newsize;
+    if (ptr == 0)
+        ptr = new_page (dict, back_ptr, &p);
+    assert (p);
+    assert (ptr);
+
+    indxp = (short*) ((char*) p+DICT_PAGESIZE);
+    while (*str != DICT_EOS)
+    {
+        char *info;
+        if (*--indxp > 0) /* tail string here! */
+        {
+            int cmp;
+            info = DICT_info(p) + *indxp;
+            cmp = dict_strcmp ((Dict_char*)
+                              (info+sizeof(Dict_info)+sizeof(Dict_ptr)),
+                               str);
+            if (!cmp)
+            {
+                if (memcmp (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo)))
+                {
+                    memcpy (info+sizeof(Dict_ptr), userinfo, sizeof(userinfo));
+                    bf_touch (dict->bf, ptr);
+                }
+                return 0;
+            }
+            else if(cmp < 0)
+                break;
+            
+        }
+        else if(*indxp < 0)  /* tail of string in sub page */
+        {
+            int cmp;
+            info = DICT_info(p) - *indxp;
+            cmp = memcmp (info+sizeof(Dict_info)+sizeof(Dict_ptr), str, 
+                         sizeof(Dict_char));
+            if (!cmp)
+            {
+                Dict_ptr subptr;
+                void *pp;
+                memcpy (&subptr, info, sizeof(subptr));
+                if (subptr == 0)
+                {
+                    subptr = new_page (dict, ptr, &pp);
+                    memcpy (info, &subptr, sizeof(subptr));
+                    bf_touch (dict->bf, ptr);
+                }
+                return dict_ins (dict, str+1, ptr, pp, userinfo);
+            }
+            else if(cmp < 0)
+                break;
+        }
+        else
+            break;
+    }
+    newsize = DICT_size(p);
+    subptr = 0;
+    memcpy (DICT_info(p) + newsize, &subptr, sizeof(subptr));
+    memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr), userinfo,
+            sizeof(Dict_info));
+    memcpy (DICT_info(p) + newsize + sizeof(Dict_ptr)+sizeof(Dict_info),
+            str, dict_strlen (str));
+    newsize = DICT_size(p) +
+        sizeof(Dict_info) + sizeof(Dict_ptr) + dict_strlen (str);
+    DICT_size (p) = newsize;
+
+    DICT_nodir(p) = DICT_nodir(p)+1;
+    indxp2 = (short*)((char*) p + DICT_PAGESIZE - DICT_nodir(p)*sizeof(short));
+    for (indxp1 = indxp2; indxp1 != indxp; indxp1++)
+        indxp[0] = indxp[1];
+    *indxp = -newsize;
+    return 0;
+}
+
+int dict_insert (Dict dict, const Dict_char *str, void *userinfo)
+{
+    dict_ins (dict, str, 0, NULL, userinfo);
+    return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dict/lookup.c b/dict/lookup.c
new file mode 100644 (file)
index 0000000..0cb2260
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: lookup.c,v $
+ * Revision 1.1  1994-08-16 16:26:48  adam
+ * Added dict.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include <dict.h>
+
+int dict_lookup (Dict dict, Dict_char *p)
+{
+    return 0;
+}
+
+
diff --git a/dict/open.c b/dict/open.c
new file mode 100644 (file)
index 0000000..fda7f4f
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: open.c,v $
+ * Revision 1.1  1994-08-16 16:26:49  adam
+ * Added dict.
+ *
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <dict.h>
+
+Dict dict_open (const char *name, int cache, int rw)
+{
+    Dict dict;
+    void *head_buf;
+    struct Dict_head *dh;
+
+    dict = xmalloc (sizeof(*dict));
+
+    if (rw)
+        dict->bf = bf_open_w (name, DICT_PAGESIZE, cache);
+    else
+        dict->bf = bf_open (name, DICT_PAGESIZE, cache);
+
+    if(!dict->bf)
+    {
+        free (dict);
+        return NULL;
+    }
+    if (bf_read (dict->bf, 0, &head_buf) <= 0)
+    {
+        if (rw) 
+        {   /* create header with information (page 0) */
+            bf_newp (dict->bf, 0, &head_buf);
+            dh = (struct Dict_head *) head_buf;
+            strcpy(dh->magic_str, DICT_MAGIC);
+            dh->free_list = dh->last = 1;
+            dh->page_size = DICT_PAGESIZE;
+            memcpy (&dict->head, dh, sizeof(*dh));
+        }
+        else
+        {   /* no header present, i.e. no dictionary at all */
+            dict->head.free_list = dict->head.last = 0;
+            dict->head.page_size = DICT_PAGESIZE;
+        }
+    }
+    else /* header was there, check magic and page size */
+    {
+        dh = (struct Dict_head *) head_buf;
+        if (!strcmp (dh->magic_str, DICT_MAGIC))
+        {
+            bf_close (dict->bf);
+            free (dict);
+            return NULL;
+        }
+        if (dh->page_size != DICT_PAGESIZE)
+        {
+            bf_close (dict->bf);
+            free (dict);
+            return NULL;
+        }
+        memcpy (&dict->head, dh, sizeof(*dh));
+    }
+    return dict;
+}
+
+int dict_strcmp (const Dict_char *s1, const Dict_char *s2)
+{
+    return strcmp (s1, s2);
+}
+
+int dict_strlen (const Dict_char *s)
+{
+    return strlen(s)+1;
+}
diff --git a/include/dict.h b/include/dict.h
new file mode 100644 (file)
index 0000000..42431a0
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 1994, Index Data I/S 
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: dict.h,v $
+ * Revision 1.1  1994-08-16 16:26:53  adam
+ * Added dict.
+ *
+ */
+
+#ifndef DICT_H
+#define DICT_H
+
+#include <bfile.h>
+
+typedef unsigned Dict_ptr;
+typedef char Dict_char;
+
+struct Dict_head {
+    char magic_str[8];
+    int page_size;
+    Dict_ptr free_list, last;
+};
+
+typedef struct Dict_struct {
+    BFile bf;
+    struct Dict_head head;
+} *Dict;
+
+#define DICT_MAGIC "dict00"
+
+typedef int Dict_info;
+
+#define DICT_PAGESIZE 8192
+    
+Dict dict_open (const char *name, int cache, int rw);
+int dict_close (Dict dict);
+int dict_insert (Dict dict, const Dict_char *p, void *userinfo);
+int dict_lookup (Dict dict, Dict_char *p);
+int dict_strcmp (const Dict_char *s1, const Dict_char *s2);
+int dict_strlen (const Dict_char *s);
+
+#define DICT_EOS        0
+#define DICT_type(x)    0[(Dict_ptr*) x]
+#define DICT_backptr(x) 1[(Dict_ptr*) x]
+#define DICT_nextptr(x) 2[(Dict_ptr*) x]
+#define DICT_nodir(x)   0[(short*)((char*)(x)+3*sizeof(Dict_ptr))]
+#define DICT_size(x)    1[(short*)((char*)(x)+3*sizeof(Dict_ptr))]
+#define DICT_info(x)    ((char*)(x)+3*sizeof(Dict_ptr)+2*sizeof(short))
+
+#define DICT_to_str(x)  sizeof(Dict_info)+sizeof(Dict_ptr)
+
+
+/*
+   type            type of page
+   backptr         pointer to parent
+   nextptr         pointer to next page (if any)
+   nodir           no of words
+   size            size of strings,info,ptr entries
+
+   dir[0..nodir-1]
+   ptr,info,string
+ */
+
+   
+#endif