X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=c6897dc0ee6f0d4a3e819091d640ce51c832dfe8;hb=b8844c65622c12fb105bf7fc6bab2fc237af7c14;hp=479e8bdd6988c2b0ac5a3c04871d963828a49694;hpb=99e4ce02beb2ea748b9970ef35fe8834f04a1528;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 479e8bd..c6897dc 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,10 +1,30 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1996, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.54 1996-05-01 13:46:35 adam + * Revision 1.60 1996-06-04 10:18:12 adam + * Search/scan uses character mapping module. + * + * Revision 1.59 1996/05/14 15:47:07 adam + * Cleanup of various buffer size entities. + * + * Revision 1.58 1996/05/14 06:16:38 adam + * Compact use/set bytes used in search service. + * + * Revision 1.57 1996/05/13 14:23:04 adam + * Work on compaction of set/use bytes in dictionary. + * + * Revision 1.56 1996/05/09 09:54:42 adam + * Server supports maps from one logical attributes to a list of physical + * attributes. + * The extraction process doesn't make space consuming 'any' keys. + * + * Revision 1.55 1996/05/09 07:28:55 quinn + * Work towards phrases and multiple registers + * + * Revision 1.54 1996/05/01 13:46:35 adam * First work on multiple records in one file. * New option, -offset, to the "unread" command in the filter module. * @@ -199,13 +219,12 @@ #include #include #include -#include #include #include #include "index.h" -#include "recindex.h" +#include "zinfo.h" static Dict matchDict; @@ -222,6 +241,8 @@ static int records_updated = 0; static int records_deleted = 0; static int records_processed = 0; +static ZebTargetInfo *zti = NULL; + static void logRecord (int showFlag) { if (!showFlag) @@ -252,12 +273,15 @@ void key_open (int mem) } assert (!records); records = rec_open (1); +#if 1 + zti = zebTargetInfo_open (records, 1); +#endif } struct encode_info { int sysno; int seqno; - char buf[512]; + char buf[768]; }; void encode_key_init (struct encode_info *i) @@ -365,6 +389,9 @@ int key_close (void) { key_flush (); xfree (key_buf); +#if 1 + zebTargetInfo_close (zti, 1); +#endif rec_close (&records); dict_close (matchDict); @@ -418,32 +445,35 @@ static void addRecordKey (const RecWord *p) else reckeys.prevAttrUse = attrUse; + *dst++ = lead; + + if (!(lead & 1)) + { + memcpy (dst, &attrSet, sizeof(attrSet)); + dst += sizeof(attrSet); + } + if (!(lead & 2)) + { + memcpy (dst, &attrUse, sizeof(attrUse)); + dst += sizeof(attrUse); + } switch (p->which) { - case Word_String: - *dst++ = lead; - - if (!(lead & 1)) - { - memcpy (dst, &attrSet, sizeof(attrSet)); - dst += sizeof(attrSet); - } - if (!(lead & 2)) - { - memcpy (dst, &attrUse, sizeof(attrUse)); - dst += sizeof(attrUse); - } - for (i = 0; p->u.string[i]; i++) - *dst++ = p->u.string[i]; - *dst++ = '\0'; - - memcpy (dst, &p->seqno, sizeof(p->seqno)); - dst += sizeof(p->seqno); - - break; - default: - return; + case Word_String: + *dst++ = 'w'; + break; + case Word_Phrase: + *dst++ = 'p'; + break; + case Word_Numeric: + *dst++ = 'n'; } + for (i = 0; p->u.string[i]; i++) + *dst++ = p->u.string[i]; + *dst++ = '\0'; + + memcpy (dst, &p->seqno, sizeof(p->seqno)); + dst += sizeof(p->seqno); reckeys.buf_used = dst - reckeys.buf; } @@ -453,6 +483,12 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, char attrSet = -1; short attrUse = -1; int off = 0; + + if (zebTargetInfo_curDatabase (zti, databaseName)) + { + if (zebTargetInfo_newDatabase (zti, databaseName)) + abort (); + } while (off < reckeys->buf_used) { const char *src = reckeys->buf + off; @@ -475,10 +511,14 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, key_flush (); ++ptr_i; key_buf[ptr_top-ptr_i] = (char*)key_buf + key_buf_used; - key_buf_used += index_word_prefix ((char*)key_buf + key_buf_used, - attrSet, attrUse, databaseName); + + lead = zebTargetInfo_lookupSU (zti, attrSet, attrUse); + if (lead < 0) + lead = zebTargetInfo_addSU (zti, attrSet, attrUse); + assert (lead > 0); + ((char*) key_buf) [key_buf_used++] = lead; while (*src) - ((char*)key_buf) [key_buf_used++] = index_char_cvt (*src++); + ((char*)key_buf) [key_buf_used++] = *src++; src++; ((char*)key_buf) [key_buf_used++] = '\0'; @@ -547,21 +587,6 @@ static const char **searchRecordKey (struct recKeys *reckeys, return ws; } -static void addRecordKeyAny (const RecWord *p) -{ - if (p->attrSet != 1 || p->attrUse != 1016) - { - RecWord w; - - memcpy (&w, p, sizeof(w)); - w.attrSet = 1; - w.attrUse = 1016; - addRecordKey (&w); - } - if (p->attrSet != -1) - addRecordKey (p); -} - struct file_read_info { off_t file_max; off_t file_offset; @@ -813,7 +838,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, extractCtrl.fh = fi; extractCtrl.subType = subType; extractCtrl.init = wordInit; - extractCtrl.add = addRecordKeyAny; + extractCtrl.add = addRecordKey; reckeys.buf_used = 0; reckeys.prevAttrUse = -1; @@ -878,7 +903,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, } logInfo.op = "add"; if (rGroup->fileVerboseFlag) - logf (LOG_LOG, "add %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "add %s %s+%ld", rGroup->recordType, fname, (long) recordOffset); rec = rec_new (records); *sysno = rec->sysno;