X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=edf6e372cd846e30de182d305f3d43cdafc52b7b;hb=52551bf774d771a0422a401946fd0c5ee3788f34;hp=ee771ac6587a8f0d91f68177d3cffbacf58ec1ea;hpb=00d69171180b9edc123986794b572be57caa5bb2;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index ee771ac..edf6e37 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ -/* $Id: extract.c,v 1.133 2002-12-16 22:59:34 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: extract.c,v 1.143 2003-03-13 04:25:17 pop Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps This file is part of the Zebra server. @@ -440,8 +440,6 @@ static int recordExtract (ZebraHandle zh, zh->reg->keys.prevAttrSet = -1; zh->reg->keys.prevSeqNo = 0; zh->reg->sortKeys.buf_used = 0; - zh->reg->sortKeys.buf_max = 0; - zh->reg->sortKeys.buf = 0; recordOffset = fi->file_moffset; extractCtrl.offset = fi->file_moffset; @@ -916,9 +914,11 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, &rGroup, delete_flag, test_mode, + recordType, sysno, match_criteria, - "")); + "", + 0,1)); } /* If sysno is provided, then it's used to identify the reocord. @@ -931,19 +931,21 @@ int bufferExtractRecord (ZebraHandle zh, struct recordGroup *rGroup, int delete_flag, int test_mode, + const char *recordType, int *sysno, const char *match_criteria, - const char *fname) + const char *fname, + int force_update, + int allow_update) { RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int i, r; char *matchStr = 0; - RecType recType; + RecType recType = NULL; char subType[1024]; void *clientData; - SYSNO sysnotmp; Record rec; long recordOffset = 0; struct zebra_fetch_control fc; @@ -962,33 +964,45 @@ int bufferExtractRecord (ZebraHandle zh, extractCtrl.endf = zebra_record_int_end; extractCtrl.fh = &fc; + zh->reg->keys.buf_used = 0; + zh->reg->keys.prevAttrUse = -1; + zh->reg->keys.prevAttrSet = -1; + zh->reg->keys.prevSeqNo = 0; + zh->reg->sortKeys.buf_used = 0; + /* announce database */ + + if (!(rGroup->databaseName)) { + logf (LOG_WARN, "Invalid record group, no database name given"); + return 0; + } + if (zebraExplain_curDatabase (zh->reg->zei, rGroup->databaseName)) { if (zebraExplain_newDatabase (zh->reg->zei, rGroup->databaseName, 0)) - return 0; + return 0; } - - if (!(rGroup->recordType)) { - logf (LOG_WARN, "No such record type defined"); - return 0; + + if (*recordType) { + logf (LOG_DEBUG, "Record type explicitly specified: %s", recordType); + recType = recType_byName (zh->reg->recTypes, recordType, subType, + &clientData); + } else { + if (!(rGroup->recordType)) { + logf (LOG_WARN, "No such record type defined"); + return 0; + } + logf (LOG_DEBUG, "Get record type from rgroup: %s",rGroup->recordType); + recType = recType_byName (zh->reg->recTypes, rGroup->recordType, subType, + &clientData); + recordType = rGroup->recordType; } - - if (!(recType = - recType_byName (zh->reg->recTypes, rGroup->recordType, subType, - &clientData))) - { + + if (!recType) { logf (LOG_WARN, "No such record type: %s", rGroup->recordType); return 0; } - zh->reg->keys.buf_used = 0; - zh->reg->keys.prevAttrUse = -1; - zh->reg->keys.prevAttrSet = -1; - zh->reg->keys.prevSeqNo = 0; - zh->reg->sortKeys.buf_used = 0; - zh->reg->sortKeys.buf_max = 0; - zh->reg->sortKeys.buf = 0; - + extractCtrl.subType = subType; extractCtrl.init = extract_init; extractCtrl.tokenAdd = extract_token_add; @@ -1034,25 +1048,24 @@ int bufferExtractRecord (ZebraHandle zh, /* match criteria */ matchStr = NULL; - if (! *sysno) { - char *rinfo; - if (strlen(match_criteria) > 0) { - matchStr = (char *)match_criteria; - } else { - if (rGroup->recordId && *rGroup->recordId) { - matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, - rGroup->recordId); - } - } - if (matchStr) { - rinfo = dict_lookup (zh->reg->matchDict, matchStr); - if (rinfo) - memcpy (sysno, rinfo+1, sizeof(*sysno)); - } else { - logf (LOG_WARN, "Bad match criteria (recordID)"); - return 0; - } - + if (! *sysno && match_criteria) { + char *rinfo; + if (*match_criteria) { + matchStr = (char *)match_criteria; + } else { + if (rGroup->recordId && *rGroup->recordId) { + matchStr = fileMatchStr (zh, &zh->reg->keys, rGroup, fname, + rGroup->recordId); + } + } + if (matchStr) { + rinfo = dict_lookup (zh->reg->matchDict, matchStr); + if (rinfo) + memcpy (sysno, rinfo+1, sizeof(*sysno)); + } else { + logf (LOG_WARN, "Bad match criteria (recordID)"); + return 0; + } } if (! *sysno) @@ -1060,12 +1073,12 @@ int bufferExtractRecord (ZebraHandle zh, /* new record */ if (delete_flag) { - logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot delete record above (seems new)"); return 1; } - logf (LOG_LOG, "add %s %s %ld", rGroup->recordType, fname, + logf (LOG_LOG, "add %s %s %ld", recordType, fname, (long) recordOffset); rec = rec_new (zh->reg->records); @@ -1082,28 +1095,38 @@ int bufferExtractRecord (ZebraHandle zh, extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); zh->records_inserted++; - } + } else { /* record already exists */ struct recKeys delkeys; struct sortKeys sortKeys; + if (!allow_update) { + logf (LOG_LOG, "skipped %s %s %ld", + recordType, fname, (long) recordOffset); + logRecord(zh); + return -1; + } + rec = rec_get (zh->reg->records, *sysno); assert (rec); recordAttr = rec_init_attr (zh->reg->zei, rec); - if (recordAttr->runNumber == - zebraExplain_runNumberIncrement (zh->reg->zei, 0)) - { - logf (LOG_LOG, "skipped %s %s %ld", rGroup->recordType, - fname, (long) recordOffset); - extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); - rec_rm (&rec); - logRecord(zh); - return 1; + if (!force_update) { + if (recordAttr->runNumber == + zebraExplain_runNumberIncrement (zh->reg->zei, 0)) + { + logf (LOG_LOG, "skipped %s %s %ld", recordType, + fname, (long) recordOffset); + extract_flushSortKeys (zh, *sysno, -1, &zh->reg->sortKeys); + rec_rm (&rec); + logRecord(zh); + return 1; + } } + delkeys.buf_used = rec->size[recInfo_delKeys]; delkeys.buf = rec->info[recInfo_delKeys]; @@ -1117,13 +1140,13 @@ int bufferExtractRecord (ZebraHandle zh, /* record going to be deleted */ if (!delkeys.buf_used) { - logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot delete file above, storeKeys false"); } else { - logf (LOG_LOG, "delete %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "delete %s %s %ld", recordType, fname, (long) recordOffset); zh->records_deleted++; if (matchStr) @@ -1139,13 +1162,13 @@ int bufferExtractRecord (ZebraHandle zh, /* record going to be updated */ if (!delkeys.buf_used) { - logf (LOG_LOG, "update %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "update %s %s %ld", recordType, fname, (long) recordOffset); logf (LOG_WARN, "cannot update file above, storeKeys false"); } else { - logf (LOG_LOG, "update %s %s %ld", rGroup->recordType, + logf (LOG_LOG, "update %s %s %ld", recordType, fname, (long) recordOffset); extract_flushSortKeys (zh, *sysno, 1, &zh->reg->sortKeys); extract_flushRecordKeys (zh, *sysno, 1, &zh->reg->keys); @@ -1156,7 +1179,7 @@ int bufferExtractRecord (ZebraHandle zh, /* update file type */ xfree (rec->info[recInfo_fileType]); rec->info[recInfo_fileType] = - rec_strdup (rGroup->recordType, &rec->size[recInfo_fileType]); + rec_strdup (recordType, &rec->size[recInfo_fileType]); /* update filename */ xfree (rec->info[recInfo_filename]); @@ -1591,7 +1614,7 @@ static void extract_add_sort_string (RecWord *p, const char *string, { ZebraHandle zh = p->extractCtrl->handle; struct sortKeys *sk = &zh->reg->sortKeys; - size_t off = 0; + int off = 0; while (off < sk->buf_used) { @@ -1727,10 +1750,11 @@ void extract_token_add (RecWord *p) { WRBUF wrbuf; +#if 0 yaz_log (LOG_LOG, "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s", p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length, p->string); - +#endif if ((wrbuf = zebra_replace(p->zebra_maps, p->reg_type, 0, p->string, p->length))) { @@ -1753,13 +1777,13 @@ void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct sortKeys *sk) { SortIdx sortIdx = zh->reg->sortIdx; - size_t off = 0; + int off = 0; sortIdx_sysno (sortIdx, sysno); while (off < sk->buf_used) { - int set, use, slen, l; + int set, use, slen; off += key_SU_decode(&set, sk->buf + off); off += key_SU_decode(&use, sk->buf + off);