X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dict%2Flookupec.c;h=686aa0400d3032f8a8077809cd79cc025242effa;hb=42143dd8393ba4fbad62289dcff47b150c1f4be5;hp=56e53848c37a4d7150c8b01378cab31f78565383;hpb=02ac0a77d27046442a63371dbf37ee5c0c452dee;p=idzebra-moved-to-github.git diff --git a/dict/lookupec.c b/dict/lookupec.c index 56e5384..686aa04 100644 --- a/dict/lookupec.c +++ b/dict/lookupec.c @@ -4,13 +4,30 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lookupec.c,v $ - * Revision 1.1 1994-09-22 10:43:44 adam + * Revision 1.6 1996-02-02 13:43:51 adam + * The public functions simply use char instead of Dict_char to represent + * search strings. Dict_char is used internally only. + * + * Revision 1.5 1995/01/24 16:01:03 adam + * Added -ansi to CFLAGS. + * Use new API of dfa module. + * + * Revision 1.4 1994/10/05 12:16:51 adam + * Pagesize is a resource now. + * + * Revision 1.3 1994/09/26 16:31:06 adam + * Minor changes. + * + * Revision 1.2 1994/09/22 14:43:57 adam + * First functional version of lookup with error correction. A 'range' + * specified the maximum number of insertions+deletions+substitutions. + * + * Revision 1.1 1994/09/22 10:43:44 adam * Two versions of depend. Type 1 is the tail-type compatible with * all make programs. Type 2 is the GNU make with include facility. * Type 2 is default. depend rule chooses current rule. * */ - #include #include #include @@ -27,87 +44,94 @@ typedef struct { #define SH(x) (((x)<<1)+1) -int dict_look_ec (Dict dict, MatchInfo *mi, MatchWord *ri_base, int pos, - int (*userfunc)(Dict_char *), int range) +int dict_look_ec (Dict dict, Dict_ptr ptr, MatchInfo *mi, MatchWord *ri_base, + int pos, int (*userfunc)(char *), int range, + Dict_char *prefix) { - Dict_ptr ptr = 1; - int mid, lo, hi; + int lo, hi; void *p; short *indxp; char *info; MatchWord match_mask = 1<<(mi->m-1); dict_bf_readp (dict->dbf, ptr, &p); - mid = lo = 0; + lo = 0; hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); + indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short)); while (lo <= hi) { - mid = lo; - if (indxp[-mid] > 0) + if (indxp[-lo] > 0) { /* string (Dict_char *) DICT_EOS terminated */ /* unsigned char length of information */ /* char * information */ MatchWord *ri = ri_base, sc; int i, j; - info = (char*)p + indxp[-mid]; + info = (char*)p + indxp[-lo]; for (j=0; ; j++) { Dict_char ch; memcpy (&ch, info+j*sizeof(Dict_char), sizeof(Dict_char)); - if (j && (ri[-1] & match_mask)) + prefix[pos+j] = ch; + if (ch == DICT_EOS) { - if (ch == DICT_EOS) - (*userfunc)(info); + if (ri[range] & match_mask) + (*userfunc)((char*) prefix); break; } - if (j > mi->m+range-pos) - break; - if (ch == DICT_EOS) + if (j+pos >= mi->m+range) break; sc = mi->s[ch & 255]; ri[1+range] = SH(ri[0]) & sc; for (i=1; i<=range; i++) - ri[i+1+range] = (SH(ri[i])&sc) | SH(ri[i-1]) + ri[i+1+range] = (SH(ri[i]) & sc) | SH(ri[i-1]) | SH(ri[i+range]) | ri[i-1]; ri += 1+range; + if (!(ri[range] & (1<<(pos+j)))) + break; } } -#if 0 else { - Dict_char dc; - Dict_ptr subptr; + Dict_char ch; + MatchWord *ri = ri_base, sc; + int i; /* Dict_ptr subptr */ /* Dict_char sub char */ /* unsigned char length of information */ /* char * information */ - info = (char*)p - indxp[-mid]; - memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char)); - cmp = dc- *str; - if (!cmp) + info = (char*)p - indxp[-lo]; + memcpy (&ch, info+sizeof(Dict_ptr), sizeof(Dict_char)); + prefix[pos] = ch; + + sc = mi->s[ch & 255]; + ri[1+range] = SH(ri[0]) & sc; + for (i=1; i<=range; i++) + ri[i+1+range] = (SH(ri[i]) & sc) | SH(ri[i-1]) + | SH(ri[i+range]) | ri[i-1]; + ri += 1+range; + if (ri[range] & (1<dbf, ptr, &p); - mid = lo = 0; - hi = DICT_nodir(p)-1; - indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short)); - continue; + indxp = (short*) ((char*) p + + DICT_pagesize(dict)-sizeof(short)); } } - } -#endif lo++; } return 0; @@ -129,26 +153,26 @@ static MatchInfo *prepare_match (Dict_char *pattern) return mi; } -int dict_lookup_ec (Dict dict, Dict_char *pattern, int range, - int (*userfunc)(Dict_char *name)) +int dict_lookup_ec (Dict dict, char *pattern, int range, + int (*userfunc)(char *name)) { MatchInfo *mi; MatchWord *ri; int i; + Dict_char prefix[2048]; if (dict->head.last == 1) return 0; - mi = prepare_match (pattern); - ri = xmalloc ((dict_strlen(pattern)+range+2)*(range+1)*sizeof(*ri)); + mi = prepare_match ((Dict_char*) pattern); + + ri = xmalloc ((dict_strlen((Dict_char*) pattern)+range+2) + * (range+1)*sizeof(*ri)); for (i=0; i<=range; i++) ri[i] = (2<