X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=f0f3d1257a3838ddb079e0d822ed739c853c7d1c;hb=dc017c2fd1686d5a1bb5b04c45f11c69da60421a;hp=32aed3667db1a5ce7c01f792eaee843b3fba5767;hpb=ce3907338568fce46c5751e7e1091a5ad1c8e291;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 32aed36..f0f3d12 100644 --- a/index/extract.c +++ b/index/extract.c @@ -4,7 +4,23 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: extract.c,v $ - * Revision 1.80 1998-03-05 08:45:11 adam + * Revision 1.84 1998-06-11 15:42:22 adam + * Changed the way use attributes are specified in the recordId + * specification. + * + * Revision 1.83 1998/06/08 14:43:10 adam + * Added suport for EXPLAIN Proxy servers - added settings databasePath + * and explainDatabase to facilitate this. Increased maximum number + * of databases and attributes in one register. + * + * Revision 1.82 1998/05/20 10:12:15 adam + * Implemented automatic EXPLAIN database maintenance. + * Modified Zebra to work with ASN.1 compiled version of YAZ. + * + * Revision 1.81 1998/03/11 11:19:04 adam + * Changed the way sequence numbers are generated. + * + * Revision 1.80 1998/03/05 08:45:11 adam * New result set model and modular ranking system. Moved towards * descent server API. System information stored as "SGML" records. * @@ -336,8 +352,13 @@ static void logRecord (int showFlag) } } -int key_open (BFiles bfs, int mem, int rw, data1_handle dh) +static int explain_extract (void *handle, Record drec, data1_node *n); + +int key_open (struct recordGroup *rGroup, int mem) { + BFiles bfs = rGroup->bfs; + int rw = rGroup->flagRw; + data1_handle dh = rGroup->dh; if (!mem) mem = atoi(res_get_def (common_resource, "memMax", "4"))*1024*1024; if (mem < 50000) @@ -361,7 +382,8 @@ int key_open (BFiles bfs, int mem, int rw, data1_handle dh) dict_close (matchDict); return -1; } - zti = zebraExplain_open (records, dh, rw); + zti = zebraExplain_open (records, dh, common_resource, + rw, rGroup, explain_extract); if (!zti) { rec_close (&records); @@ -540,13 +562,14 @@ void key_flush (void) key_buf_used = 0; } -int key_close (int rw) +int key_close (struct recordGroup *rGroup) { - key_flush (); - xfree (key_buf); + int rw = rGroup->flagRw; if (rw) zebraExplain_runNumberIncrement (zti, 1); - zebraExplain_close (zti, rw); + zebraExplain_close (zti, rw, 0); + key_flush (); + xfree (key_buf); rec_close (&records); dict_close (matchDict); sortIdx_close (sortIdx); @@ -558,7 +581,8 @@ int key_close (int rw) static void wordInit (struct recExtractCtrl *p, RecWord *w) { w->zebra_maps = p->zebra_maps; - w->attrSet = 1; + w->seqnos = p->seqno; + w->attrSet = VAL_BIB1; w->attrUse = 1016; w->reg_type = 'w'; } @@ -587,6 +611,7 @@ static void addIndexString (RecWord *p, const char *string, int length) short attrUse; int lead = 0; int diff = 0; + int *pseqno = &p->seqnos[p->reg_type]; if (reckeys.buf_used+1024 > reckeys.buf_max) { @@ -611,14 +636,14 @@ static void addIndexString (RecWord *p, const char *string, int length) else reckeys.prevAttrUse = attrUse; #if 1 - diff = 1 + p->seqno - reckeys.prevSeqNo; + diff = 1 + *pseqno - reckeys.prevSeqNo; if (diff >= 1 && diff <= 15) lead |= (diff << 2); else diff = 0; #endif - reckeys.prevSeqNo = p->seqno; - + reckeys.prevSeqNo = *pseqno; + *dst++ = lead; if (!(lead & 1)) @@ -638,11 +663,11 @@ static void addIndexString (RecWord *p, const char *string, int length) if (!diff) { - memcpy (dst, &p->seqno, sizeof(p->seqno)); - dst += sizeof(p->seqno); + memcpy (dst, pseqno, sizeof(*pseqno)); + dst += sizeof(*pseqno); } reckeys.buf_used = dst - reckeys.buf; - (p->seqno)++; + (*pseqno)++; } static void addSortString (RecWord *p, const char *string, int length) @@ -788,19 +813,13 @@ static void flushSortKeys (SYSNO sysno, int cmd) sortKeys = NULL; } -static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, - const char *databaseName) +static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys) { char attrSet = -1; short attrUse = -1; int seqno = 0; int off = 0; - if (zebraExplain_curDatabase (zti, databaseName)) - { - if (zebraExplain_newDatabase (zti, databaseName)) - abort (); - } zebraExplain_recordCountIncrement (zti, cmd ? 1 : -1); while (off < reckeys->buf_used) { @@ -829,7 +848,8 @@ static void flushRecordKeys (SYSNO sysno, int cmd, struct recKeys *reckeys, if (ch < 0) ch = zebraExplain_addSU (zti, attrSet, attrUse); assert (ch > 0); - ((char*) key_buf) [key_buf_used++] = ch; + key_buf_used += key_SU_code (ch, ((char*)key_buf) + key_buf_used); + while (*src) ((char*)key_buf) [key_buf_used++] = *src++; src++; @@ -933,7 +953,6 @@ static struct file_read_info *file_read_start (int fd) static void file_read_stop (struct file_read_info *fi) { - assert (fi); xfree (fi); } @@ -984,26 +1003,13 @@ static void file_end (void *handle, off_t offset) p->file_moffset = offset; } -static int atois (const char **s) -{ - int val = 0, c; - while ( (c=**s) >= '0' && c <= '9') - { - val = val*10 + c - '0'; - ++(*s); - } - return val; -} - static char *fileMatchStr (struct recKeys *reckeys, struct recordGroup *rGroup, - const char *fname, - const char *spec) + const char *fname, const char *spec) { static char dstBuf[2048]; char *dst = dstBuf; const char *s = spec; static const char **w; - int i; while (1) { @@ -1013,21 +1019,39 @@ static char *fileMatchStr (struct recKeys *reckeys, struct recordGroup *rGroup, break; if (*s == '(') { + char attset_str[64], attname_str[64]; + data1_attset *attset; + int i; char matchFlag[32]; - int attrSet, attrUse; + int attSet = 1, attUse = 1; int first = 1; s++; - attrSet = atois (&s); - if (*s != ',') - { - logf (LOG_WARN, "Missing , in match criteria %s in group %s", - spec, rGroup->groupName ? rGroup->groupName : "none"); - return NULL; - } - s++; - attrUse = atois (&s); - w = searchRecordKey (reckeys, attrSet, attrUse); + for (i = 0; *s && *s != ',' && *s != ')'; s++) + if (i < 63) + attset_str[i++] = *s; + attset_str[i] = '\0'; + + if (*s == ',') + { + s++; + for (i = 0; *s && *s != ')'; s++) + if (i < 63) + attname_str[i++] = *s; + attname_str[i] = '\0'; + } + + if ((attset = data1_get_attset (rGroup->dh, attset_str))) + { + data1_att *att; + attSet = attset->reference; + att = data1_getattbyname(rGroup->dh, attset, attname_str); + if (att) + attUse = att->value; + else + attUse = atoi (attname_str); + } + w = searchRecordKey (reckeys, attSet, attUse); assert (w); if (*s == ')') @@ -1057,7 +1081,7 @@ static char *fileMatchStr (struct recKeys *reckeys, struct recordGroup *rGroup, if (first) { logf (LOG_WARN, "Record didn't contain match" - " fields in (%d,%d)", attrSet, attrUse); + " fields in (%s,%s)", attset_str, attname_str); return NULL; } } @@ -1146,42 +1170,53 @@ static void recordLogPreamble (int level, const char *msg, void *info) log_event_start (NULL, NULL); } +void addSchema (struct recExtractCtrl *p, Odr_oid *oid) +{ + zebraExplain_addSchema (zti, oid); +} + static int recordExtract (SYSNO *sysno, const char *fname, struct recordGroup *rGroup, int deleteFlag, - struct file_read_info *fi, RecType recType, - char *subType) + struct file_read_info *fi, + RecType recType, char *subType) { - struct recExtractCtrl extractCtrl; RecordAttr *recordAttr; int r; char *matchStr; SYSNO sysnotmp; - off_t recordOffset = 0; Record rec; struct recordLogInfo logInfo; + off_t recordOffset = 0; if (fi->fd != -1) { + struct recExtractCtrl extractCtrl; + /* we are going to read from a file, so prepare the extraction */ - extractCtrl.fh = fi; - extractCtrl.subType = subType; - extractCtrl.init = wordInit; - extractCtrl.add = addRecordKey; - extractCtrl.dh = rGroup->dh; + int i; - reckeys.buf_used = 0; - reckeys.prevAttrUse = -1; - reckeys.prevAttrSet = -1; - reckeys.prevSeqNo = 0; - - recordOffset = fi->file_moffset; - extractCtrl.offset = recordOffset; - extractCtrl.readf = file_read; - extractCtrl.seekf = file_seek; - extractCtrl.tellf = file_tell; - extractCtrl.endf = file_end; + reckeys.buf_used = 0; + reckeys.prevAttrUse = -1; + reckeys.prevAttrSet = -1; + reckeys.prevSeqNo = 0; + + recordOffset = fi->file_moffset; + extractCtrl.offset = fi->file_moffset; + extractCtrl.readf = file_read; + extractCtrl.seekf = file_seek; + extractCtrl.tellf = file_tell; + extractCtrl.endf = file_end; + extractCtrl.fh = fi; + extractCtrl.subType = subType; + extractCtrl.init = wordInit; + extractCtrl.addWord = addRecordKey; + extractCtrl.addSchema = addSchema; + extractCtrl.dh = rGroup->dh; + for (i = 0; i<256; i++) + extractCtrl.seqno[i] = 0; extractCtrl.zebra_maps = rGroup->zebra_maps; - extractCtrl.flagShowRecords = !rGroup->flagRw; + extractCtrl.flagShowRecords = !rGroup->flagRw; + if (!rGroup->flagRw) printf ("File: %s %ld\n", fname, (long) recordOffset); @@ -1267,7 +1302,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, { dict_insert (matchDict, matchStr, sizeof(*sysno), sysno); } - flushRecordKeys (*sysno, 1, &reckeys, rGroup->databaseName); + flushRecordKeys (*sysno, 1, &reckeys); flushSortKeys (*sysno, 1); records_inserted++; @@ -1293,7 +1328,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; flushSortKeys (*sysno, 0); - flushRecordKeys (*sysno, 0, &delkeys, rec->info[recInfo_databaseName]); + flushRecordKeys (*sysno, 0, &delkeys); if (deleteFlag) { /* record going to be deleted */ @@ -1331,7 +1366,7 @@ static int recordExtract (SYSNO *sysno, const char *fname, if (records_processed < rGroup->fileVerboseLimit) logf (LOG_LOG, "update %s %s %ld", rGroup->recordType, fname, (long) recordOffset); - flushRecordKeys (*sysno, 1, &reckeys, rGroup->databaseName); + flushRecordKeys (*sysno, 1, &reckeys); records_updated++; } } @@ -1440,12 +1475,10 @@ int fileExtract (SYSNO *sysno, const char *fname, logf (LOG_DEBUG, "fileExtract %s", fname); /* determine file extension */ + *ext = '\0'; for (i = strlen(fname); --i >= 0; ) if (fname[i] == '/') - { - strcpy (ext, ""); break; - } else if (fname[i] == '.') { strcpy (ext, fname+i+1); @@ -1458,20 +1491,17 @@ int fileExtract (SYSNO *sysno, const char *fname, if (!(rGroup->recordType = res_get (common_resource, ext_res))) { sprintf (ext_res, "%srecordType", gprefix); - if (!(rGroup->recordType = res_get (common_resource, ext_res))) - { - if (records_processed < rGroup->fileVerboseLimit) - logf (LOG_LOG, "? %s", fname); - return 0; - } + rGroup->recordType = res_get (common_resource, ext_res); } } if (!rGroup->recordType) { if (records_processed < rGroup->fileVerboseLimit) - logf (LOG_LOG, "? record %s", fname); + logf (LOG_LOG, "? %s", fname); return 0; } + if (!*rGroup->recordType) + return 0; if (!(recType = recType_byName (rGroup->recordType, subType))) { logf (LOG_WARN, "No such record type: %s", rGroup->recordType); @@ -1498,6 +1528,20 @@ int fileExtract (SYSNO *sysno, const char *fname, if (!rGroup->databaseName) rGroup->databaseName = "Default"; + /* determine if explain database */ + + sprintf (ext_res, "%sexplainDatabase", gprefix); + rGroup->explainDatabase = + atoi (res_get_def (common_resource, ext_res, "0")); + + /* announce database */ + if (zebraExplain_curDatabase (zti, rGroup->databaseName)) + { + if (zebraExplain_newDatabase (zti, rGroup->databaseName, + rGroup->explainDatabase)) + abort (); + } + if (rGroup->flagStoreData == -1) { const char *sval; @@ -1552,3 +1596,53 @@ int fileExtract (SYSNO *sysno, const char *fname, return r; } +static int explain_extract (void *handle, Record rec, data1_node *n) +{ + struct recordGroup *rGroup = (struct recordGroup*) handle; + struct recExtractCtrl extractCtrl; + int i; + + if (zebraExplain_curDatabase (zti, rec->info[recInfo_databaseName])) + { + if (zebraExplain_newDatabase (zti, rec->info[recInfo_databaseName], 0)) + abort (); + } + + reckeys.buf_used = 0; + reckeys.prevAttrUse = -1; + reckeys.prevAttrSet = -1; + reckeys.prevSeqNo = 0; + + extractCtrl.init = wordInit; + extractCtrl.addWord = addRecordKey; + extractCtrl.addSchema = addSchema; + extractCtrl.dh = rGroup->dh; + for (i = 0; i<256; i++) + extractCtrl.seqno[i] = 0; + extractCtrl.zebra_maps = rGroup->zebra_maps; + extractCtrl.flagShowRecords = !rGroup->flagRw; + + grs_extract_tree(&extractCtrl, n); + + logf (LOG_DEBUG, "flush explain record, sysno=%d", rec->sysno); + + if (rec->size[recInfo_delKeys]) + { + struct recKeys delkeys; + + delkeys.buf_used = rec->size[recInfo_delKeys]; + delkeys.buf = rec->info[recInfo_delKeys]; + flushSortKeys (rec->sysno, 0); + flushRecordKeys (rec->sysno, 0, &delkeys); + } + flushRecordKeys (rec->sysno, 1, &reckeys); + flushSortKeys (rec->sysno, 1); + + xfree (rec->info[recInfo_delKeys]); + rec->size[recInfo_delKeys] = reckeys.buf_used; + rec->info[recInfo_delKeys] = reckeys.buf; + reckeys.buf = NULL; + reckeys.buf_max = 0; + + return 0; +}