X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dict%2Finsert.c;h=d43572c2d9e08481fc9f99229f3c4fbb052f5a58;hb=275c0e8eec44714b41300f6e743dc2e09f353aec;hp=8f2a7b71311ba8362140d8af0ea32e2258cd9985;hpb=81238bdcd599682ea14080db50622889310017ea;p=idzebra-moved-to-github.git diff --git a/dict/insert.c b/dict/insert.c index 8f2a7b7..d43572c 100644 --- a/dict/insert.c +++ b/dict/insert.c @@ -4,7 +4,22 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: insert.c,v $ - * Revision 1.7 1994-09-12 08:06:42 adam + * Revision 1.12 1995-09-06 10:34:44 adam + * Memcpy in clean_page edited to satisfy checkergcc. + * + * Revision 1.11 1995/09/04 12:33:31 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.10 1994/10/05 12:16:48 adam + * Pagesize is a resource now. + * + * Revision 1.9 1994/09/16 15:39:13 adam + * Initial code of lookup - not tested yet. + * + * Revision 1.8 1994/09/16 12:35:01 adam + * New version of split_page which use clean_page for splitting. + * + * Revision 1.7 1994/09/12 08:06:42 adam * Futher development of insert.c * * Revision 1.6 1994/09/06 13:05:15 adam @@ -40,6 +55,8 @@ static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo); +static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo); static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp) @@ -75,13 +92,14 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) void *subp; char *info_here; Dict_ptr subptr; - int i, need; - short *indxp, *best_indxp; + int i; + short *indxp, *best_indxp = NULL; Dict_char best_char = 0; Dict_char prev_char = 0; int best_no = -1, no_current = 1; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + /* determine splitting char... */ + indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp) { if (*indxp > 0) /* tail string here! */ @@ -119,7 +137,7 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) info_here = NULL; for (indxp=best_indxp, i=0; i 0); @@ -134,118 +152,28 @@ static int split_page (Dict dict, Dict_ptr ptr, void *p) assert (!info_here); info_here = info+(slen+1)*sizeof(Dict_char); } - } - /* calculate the amount of bytes needed for this entry when */ - /* transformed to a sub entry */ - need = sizeof(Dict_char)+sizeof(Dict_ptr)+1; - if (info_here) - need += *info_here; - - indxp = best_indxp; - /* now loop on all entries with string length > 1 i.e. all */ - /* those entries which contribute to a sub page */ - best_indxp = NULL; - for (i=0; i 0); - - info = (char*) p + *indxp; /* entry start */ - assert (*info == best_char); - slen = dict_strlen(info); - - if (slen > 1) + else { info1 = info+(1+slen)*sizeof(Dict_char); /* info start */ - - if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1) - best_indxp = indxp; /* space for entry */ dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1); dict_bf_readp (dict->dbf, ptr, &p); } } - if (best_indxp) - { /* there was a hole big enough for a sub entry */ - char *info = (char*) p + *best_indxp; - short *indxp1; - - *--indxp = - *best_indxp; - DICT_type(p) = 1; - DICT_nodir (p) -= (best_no-1); - indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short)); - while (indxp != indxp1) - { - --indxp; - *indxp = indxp[1-best_no]; - } - memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */ - info += sizeof(Dict_ptr); - memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */ - info += sizeof(Dict_char); - if (info_here) - memcpy (info, info_here, *info_here+1); /* with information */ - else - *info = 0; /* without info */ -#if CHECK - best_indxp = NULL; - prev_char = 0; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); - for (i = DICT_nodir (p); --i >= 0; --indxp) - { - if (*indxp > 0) /* tail string here! */ - { - Dict_char dc; - - memcpy (&dc, (char*) p + *indxp, sizeof(dc)); - assert (dc != best_char); - assert (dc >= prev_char); - prev_char = dc; - } - else - { - Dict_char dc; - memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr), - sizeof(dc)); - assert (dc > prev_char); - if (dc == best_char) - { - assert (best_indxp == NULL); - best_indxp = indxp; - } - prev_char = dc; - } - } - assert (best_indxp); -#endif - } - else - { - short *indxp1, *indxp2; - assert (0); - DICT_type(p) = 1; - DICT_nodir(p) -= best_no; - indxp2 = indxp; - indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short)); - do - { - --indxp2; - indxp2[0] = indxp2[-best_no]; - } while (indxp2 != indxp1); - } + /* now clean the page ... */ + clean_page (dict, ptr, p, &best_char, subptr, info_here); return 0; } -static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out) +static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out, + Dict_ptr subptr, char *userinfo) { char *np = xmalloc (dict->head.page_size); int i, slen, no = 0; short *indxp1, *indxp2; char *info1, *info2; - indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); - indxp2 = (short*) ((char*) np+DICT_PAGESIZE); + indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); + indxp2 = (short*) ((char*) np+DICT_pagesize(dict)); info2 = (char*) np + DICT_infoffset; for (i = DICT_nodir (p); --i >= 0; --indxp1) { @@ -257,12 +185,30 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out) info1 = (char*) p + *indxp1; if (out && memcmp (out, info1, sizeof(Dict_char)) == 0) + { + if (subptr == 0) + continue; + *--indxp2 = -(info2 - np); + memcpy (info2, &subptr, sizeof(Dict_ptr)); + info2 += sizeof(Dict_ptr); + memcpy (info2, out, sizeof(Dict_char)); + info2 += sizeof(Dict_char); + if (userinfo) + { + memcpy (info2, userinfo, *userinfo+1); + info2 += *userinfo + 1; + } + else + *info2++ = 0; + subptr = 0; + ++no; continue; + } *--indxp2 = info2 - np; slen = (dict_strlen(info1)+1)*sizeof(Dict_char); memcpy (info2, info1, slen); + info1 += slen; info2 += slen; - info1 += slen; } else { @@ -275,20 +221,30 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out) *--indxp2 = -(info2 - np); info1 = (char*) p - *indxp1; memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char)); + info1 += sizeof(Dict_ptr)+sizeof(Dict_char); info2 += sizeof(Dict_ptr)+sizeof(Dict_char); - info1 += sizeof(Dict_ptr)+sizeof(Dict_char); } slen = *info1+1; memcpy (info2, info1, slen); info2 += slen; ++no; } +#if 1 + memcpy ((char*)p+DICT_infoffset, + (char*)np+DICT_infoffset, + info2 - ((char*)np+DICT_infoffset)); + memcpy ((char*)p + ((char*)indxp2 - (char*)np), + indxp2, + ((char*) np+DICT_pagesize(dict)) - (char*)indxp2); +#else memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset, - DICT_PAGESIZE-DICT_infoffset); + DICT_pagesize(dict)-DICT_infoffset); +#endif DICT_size(p) = info2 - np; DICT_type(p) = 0; DICT_nodir(p) = no; xfree (np); + dict_bf_touch (dict->dbf, ptr); } @@ -300,7 +256,7 @@ static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out) static int dict_ins (Dict dict, const Dict_char *str, Dict_ptr back_ptr, int userlen, void *userinfo) { - int hi, lo, mid, i, slen, cmp = 1; + int hi, lo, mid, slen, cmp = 1; Dict_ptr ptr = back_ptr; short *indxp; char *info; @@ -316,7 +272,7 @@ static int dict_ins (Dict dict, const Dict_char *str, mid = lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); while (lo <= hi) { mid = (lo+hi)/2; @@ -395,18 +351,17 @@ static int dict_ins (Dict dict, const Dict_char *str, } if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+ userlen >= - DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) + DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) { if (DICT_type(p) == 1) { - clean_page (dict, ptr, p, NULL); - dict_bf_touch (dict->dbf, ptr); + clean_page (dict, ptr, p, NULL, 0, NULL); return dict_ins (dict, str-1, ptr, userlen, userinfo); } if (split_page (dict, ptr, p)) { - log (LOG_FATAL, "Unable to split page %d\n", ptr); + logf (LOG_FATAL, "Unable to split page %d\n", ptr); abort (); } return dict_ins (dict, str-1, ptr, userlen, userinfo); @@ -451,42 +406,21 @@ static int dict_ins (Dict dict, const Dict_char *str, --indxp; slen = (dict_strlen(str)+1)*sizeof(Dict_char); if (DICT_size(p)+slen+userlen >= - DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ + DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */ { - if (DICT_type(p) == 1) - { - clean_page (dict, ptr, p, NULL); - dict_bf_touch (dict->dbf, ptr); - return dict_ins (dict, str, ptr, userlen, userinfo); - } - i = 0; - do - { - assert (i <= 1); - if (split_page (dict, ptr, p)) - { - log (LOG_FATAL, "Unable to split page %d\n", ptr); - abort (); - } - if (DICT_size(p)+slen+userlen < - DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) - break; - i++; - clean_page (dict, ptr, p, NULL); - } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE - - (1+DICT_nodir(p))*sizeof(short)); + split_page (dict, ptr, p); return dict_ins (dict, str, ptr, userlen, userinfo); } if (cmp) { short *indxp1; (DICT_nodir(p))++; - indxp1 = (short*)((char*) p + DICT_PAGESIZE + indxp1 = (short*)((char*) p + DICT_pagesize(dict) - DICT_nodir(p)*sizeof(short)); for (; indxp1 != indxp; indxp1++) indxp1[0] = indxp1[1]; #if CHECK - indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); for (i = DICT_nodir (p); --i >= 0; --indxp1) { if (*indxp1 < 0)