X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=a980bcf26b0fede3d9ba9f7da960bb4100fd773c;hb=9ceac3e96fe083b93b6fd4a676003dd10482e465;hp=86f868b3a9e7c355a1b6c92e29ed1bf4ee6448d4;hpb=f7a3769dede0071696bdcc13ae2ee1efe6d52d96;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 86f868b..a980bcf 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.214 2006-05-18 12:03:05 adam Exp $ +/* $Id: extract.c,v 1.218 2006-05-30 13:44:44 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -48,6 +48,25 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #endif +#define ENCODE_BUFLEN 768 +struct encode_info { +#if 0 + int sysno; /* previously written values for delta-compress */ + int seqno; + int cmd; + int prevsys; /* buffer for skipping insert/delete pairs */ + int prevseq; + int prevcmd; + int keylen; /* tells if we have an unwritten key in buf, and how long*/ +#endif + void *encode_handle; + void *decode_handle; + char buf[ENCODE_BUFLEN]; +}; + +static void encode_key_init (struct encode_info *i); +static void encode_key_write (char *k, struct encode_info *i, FILE *outf); +static void encode_key_flush (struct encode_info *i, FILE *outf); #define USE_SHELLSORT 0 @@ -96,12 +115,7 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) { w->zebra_maps = p->zebra_maps; w->seqno = 1; -#if NATTR -#else - w->attrSet = VAL_BIB1; - w->attrUse = 1016; -#endif - w->index_name = 0; + w->index_name = "any"; w->index_type = 'w'; w->extractCtrl = p; w->record_id = 0; @@ -110,17 +124,22 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) static void searchRecordKey(ZebraHandle zh, zebra_rec_keys_t reckeys, - int attrSetS, int attrUseS, + const char *index_name, const char **ws, int ws_length) { int i; - int ch; + int ch = -1; for (i = 0; ireg->zei, - attrSetS, attrUseS); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, '0', index_name); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'p', index_name); + if (ch < 0) + ch = zebraExplain_lookup_attr_str(zh->reg->zei, 'w', index_name); + if (ch < 0) return ; @@ -236,9 +255,7 @@ static char *fileMatchStr (ZebraHandle zh, { const char *ws[32]; char attset_str[64], attname_str[64]; - data1_attset *attset; int i; - int attSet = 1, attUse = 1; int first = 1; for (s++; strchr(FILE_MATCH_BLANK, *s); s++) @@ -251,7 +268,9 @@ static char *fileMatchStr (ZebraHandle zh, for (; strchr(FILE_MATCH_BLANK, *s); s++) ; - if (*s == ',') + if (*s != ',') + strcpy(attname_str, attset_str); + else { for (s++; strchr(FILE_MATCH_BLANK, *s); s++) ; @@ -261,18 +280,8 @@ static char *fileMatchStr (ZebraHandle zh, attname_str[i++] = *s; attname_str[i] = '\0'; } - - if ((attset = data1_get_attset (zh->reg->dh, attset_str))) - { - data1_att *att; - attSet = attset->reference; - att = data1_getattbyname(zh->reg->dh, attset, attname_str); - if (att) - attUse = att->value; - else - attUse = atoi (attname_str); - } - searchRecordKey (zh, reckeys, attSet, attUse, ws, 32); + + searchRecordKey (zh, reckeys, attname_str, ws, 32); if (*s != ')') { @@ -1571,8 +1580,7 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, ord = key.mem[0]; zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, - 0/* db */, 0/* set */, 0/* use */, - 0 /* string_index */); + 0/* db */, 0 /* string_index */); assert(index_type); zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str); @@ -1601,7 +1609,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) assert(key.len <= 4 && key.len > 2); zebraExplain_lookup_ord(zh->reg->zei, - key.mem[0], &index_type, &db, 0, 0, 0); + key.mem[0], &index_type, &db, 0); seqno = (int) key.mem[key.len-1]; @@ -1621,24 +1629,13 @@ void extract_add_index_string(RecWord *p, const char *str, int length) ZebraExplainInfo zei = zh->reg->zei; int ch; - if (p->index_name) - { - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); - } - else - { -#if NATTR - return; -#else - ch = zebraExplain_lookup_attr_su(zei, p->index_type, - p->attrSet, p->attrUse); - if (ch < 0) - ch = zebraExplain_add_attr_su(zei, p->index_type, - p->attrSet, p->attrUse); -#endif - } + if (!p->index_name) + return; + + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); + key.len = 4; key.mem[0] = ch; key.mem[1] = p->record_id; @@ -1673,24 +1670,12 @@ static void extract_add_sort_string(RecWord *p, const char *str, int length) ZebraExplainInfo zei = zh->reg->zei; int ch; - if (p->index_name) - { - ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); - } - else - { -#if NATTR - return; -#else - ch = zebraExplain_lookup_attr_su(zei, p->index_type, - VAL_IDXPATH, p->attrUse); - if (ch < 0) - ch = zebraExplain_add_attr_su(zei, p->index_type, - VAL_IDXPATH, p->attrUse); -#endif - } + if (!p->index_name) + return; + + ch = zebraExplain_lookup_attr_str(zei, p->index_type, p->index_name); + if (ch < 0) + ch = zebraExplain_add_attr_str(zei, p->index_type, p->index_name); key.len = 4; key.mem[0] = ch; key.mem[1] = p->record_id; @@ -1915,24 +1900,10 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, void encode_key_init (struct encode_info *i) { - i->sysno = 0; - i->seqno = 0; - i->cmd = -1; - i->prevsys=0; - i->prevseq=0; - i->prevcmd=-1; - i->keylen=0; i->encode_handle = iscz1_start(); i->decode_handle = iscz1_start(); } -#define OLDENCODE 1 - -#ifdef OLDENCODE -/* this is the old encode_key_write - * may be deleted once we are confident that the new works - * HL 15-oct-2002 - */ void encode_key_write (char *k, struct encode_info *i, FILE *outf) { struct it_key key; @@ -1983,107 +1954,6 @@ void encode_key_flush (struct encode_info *i, FILE *outf) iscz1_stop(i->decode_handle); } -#else - -/* new encode_key_write - * The idea is to buffer one more key, and compare them - * If we are going to delete and insert the same key, - * we may as well not bother. Should make a difference in - * updates with small modifications (appending to a mbox) - */ -void encode_key_write (char *k, struct encode_info *i, FILE *outf) -{ - struct it_key key; - char *bp; - - if (*k) /* first time for new key */ - { - bp = i->buf; - while ((*bp++ = *k++)) - ; - i->keylen= bp - i->buf -1; - assert(i->keylen+1+sizeof(struct it_key) < ENCODE_BUFLEN); - } - else - { - bp=i->buf + i->keylen; - *bp++=0; - k++; - } - - memcpy (&key, k+1, sizeof(struct it_key)); - if (0==i->prevsys) /* no previous filter, fill up */ - { - i->prevsys=key.sysno; - i->prevseq=key.seqno; - i->prevcmd=*k; - } - else if ( (i->prevsys==key.sysno) && - (i->prevseq==key.seqno) && - (i->prevcmd!=*k) ) - { /* same numbers, diff cmd, they cancel out */ - i->prevsys=0; - } - else - { /* different stuff, write previous, move buf */ - bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); - if (i->sysno != i->prevsys) - { - i->sysno = i->prevsys; - i->seqno = 0; - } - else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) - { - return; /* ??? Filters some sort of duplicates away */ - /* ??? Can this ever happen -H 15oct02 */ - } - bp = encode_key_int (i->prevseq - i->seqno, bp); - i->seqno = i->prevseq; - i->cmd = i->prevcmd; - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); - } - i->keylen=0; /* ok, it's written, forget it */ - i->prevsys=key.sysno; - i->prevseq=key.seqno; - i->prevcmd=*k; - } -} - -void encode_key_flush (struct encode_info *i, FILE *outf) -{ /* flush the last key from i */ - char *bp =i->buf + i->keylen; - if (0==i->prevsys) - { - return; /* nothing to flush */ - } - *bp++=0; - bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); - if (i->sysno != i->prevsys) - { - i->sysno = i->prevsys; - i->seqno = 0; - } - else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) - { - return; /* ??? Filters some sort of duplicates away */ - /* ??? Can this ever happen -H 15oct02 */ - } - bp = encode_key_int (i->prevseq - i->seqno, bp); - i->seqno = i->prevseq; - i->cmd = i->prevcmd; - if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) - { - yaz_log (YLOG_FATAL|YLOG_ERRNO, "fwrite"); - exit (1); - } - i->keylen=0; /* ok, it's written, forget it */ - i->prevsys=0; /* forget the values too */ - i->prevseq=0; -} -#endif /* * Local variables: * c-basic-offset: 4