X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=c2a664b6c3686e8f699b3c7e03ada38c3721f9fd;hb=e5e87e3b665f390409902a8efd716cdec1b6a2a6;hp=09902fdbfdb3a386a209a8ff5e9e780c22b08d49;hpb=2a6f64379961f30bfe6376964ce279ab5242da1f;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 09902fd..c2a664b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.147 2004-01-22 11:27:21 adam Exp $ +/* $Id: extract.c,v 1.155 2004-06-03 11:45:28 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -23,6 +23,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include +#include #ifdef WIN32 #include #else @@ -423,7 +424,7 @@ static int file_extract_record(ZebraHandle zh, { RecordAttr *recordAttr; int r; - char *matchStr; + const char *matchStr; SYSNO sysnotmp; Record rec; off_t recordOffset = 0; @@ -755,6 +756,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, char ext[128]; char ext_res[128]; struct file_read_info *fi; + const char *original_record_type = 0; if (!zh->m_group || !*zh->m_group) *gprefix = '\0'; @@ -774,6 +776,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, break; } /* determine file type - depending on extension */ + original_record_type = zh->m_record_type; if (!zh->m_record_type) { sprintf (ext_res, "%srecordType.%s", gprefix, ext); @@ -811,6 +814,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1) { logf (LOG_WARN|LOG_ERRNO, "open %s", full_rep); + zh->m_record_type = original_record_type; return 0; } } @@ -823,6 +827,7 @@ int fileExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, file_read_stop (fi); if (fd != -1) close (fd); + zh->m_record_type = original_record_type; return r; } @@ -846,7 +851,7 @@ int buffer_extract_record (ZebraHandle zh, RecordAttr *recordAttr; struct recExtractCtrl extractCtrl; int i, r; - char *matchStr = 0; + const char *matchStr = 0; RecType recType = NULL; char subType[1024]; void *clientData; @@ -953,20 +958,22 @@ int buffer_extract_record (ZebraHandle zh, if (! *sysno) { char *rinfo; if (match_criteria && *match_criteria) { - matchStr = (char *)match_criteria; + matchStr = match_criteria; } else { if (zh->m_record_id && *zh->m_record_id) { matchStr = fileMatchStr (zh, &zh->reg->keys, pr_fname, zh->m_record_id); + if (!matchStr) + { + logf (LOG_WARN, "Bad match criteria (recordID)"); + return 1; + } } } if (matchStr) { rinfo = dict_lookup (zh->reg->matchDict, matchStr); if (rinfo) memcpy (sysno, rinfo+1, sizeof(*sysno)); - } else { - logf (LOG_WARN, "Bad match criteria (recordID)"); - return 0; } } @@ -1280,8 +1287,10 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, #endif if (zh->reg->key_buf_used + 1024 > (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) - extract_flushWriteKeys (zh); + extract_flushWriteKeys (zh,0); + assert(zh->reg->ptr_i >= 0); ++(zh->reg->ptr_i); + assert(zh->reg->ptr_i > 0); (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = (char*)zh->reg->key_buf + zh->reg->key_buf_used; #if SU_SCHEME @@ -1317,24 +1326,66 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, assert (off == reckeys->buf_used); } -void extract_flushWriteKeys (ZebraHandle zh) +void extract_flushWriteKeys (ZebraHandle zh, int final) + /* optimizing: if final=1, and no files written yet */ + /* push the keys directly to merge, sidestepping the */ + /* temp file altogether. Speeds small updates */ { FILE *outf; char out_fname[200]; char *prevcp, *cp; struct encode_info encode_info; int ptr_i = zh->reg->ptr_i; + int temp_policy; #if SORT_EXTRA int i; #endif if (!zh->reg->key_buf || ptr_i <= 0) + { + logf (LOG_DEBUG, " nothing to flush section=%d buf=%p i=%d", + zh->reg->key_file_no, zh->reg->key_buf, ptr_i); + logf (LOG_DEBUG, " buf=%p ", + zh->reg->key_buf); + logf (LOG_DEBUG, " ptr=%d ",zh->reg->ptr_i); + logf (LOG_DEBUG, " reg=%p ",zh->reg); + return; + } (zh->reg->key_file_no)++; logf (LOG_LOG, "sorting section %d", (zh->reg->key_file_no)); + logf (LOG_DEBUG, " sort_buff at %p n=%d", + zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i); #if !SORT_EXTRA qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i, sizeof(char*), key_qsort_compare); + + /* zebra.cfg: tempfiles: + Y: always use temp files (old way) + A: use temp files, if more than one (auto) + = if this is both the last and the first + N: never bother with temp files (new) */ + + temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]); + if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') { + logf (LOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ", + temp_policy); + temp_policy='A'; + } + + if ( ( temp_policy =='N' ) || /* always from memory */ + ( ( temp_policy =='A' ) && /* automatic */ + (zh->reg->key_file_no == 1) && /* this is first time */ + (final) ) ) /* and last (=only) time */ + { /* go directly from memory */ + zh->reg->key_file_no =0; /* signal not to read files */ + zebra_index_merge(zh); + zh->reg->ptr_i = 0; + zh->reg->key_buf_used = 0; + return; + } + + /* Not doing directly from memory, write into a temp file */ extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no); if (!(outf = fopen (out_fname, "wb"))) @@ -1434,6 +1485,10 @@ void extract_add_index_string (RecWord *p, const char *string, } dst = keys->buf + keys->buf_used; + /* leader byte is encoded as follows: + bit 0 : 1 if attrset is unchanged; 0 if attrset is changed + bit 1 : 1 if attruse is unchanged; 0 if attruse is changed + */ attrSet = p->attrSet; if (keys->buf_used > 0 && keys->prevAttrSet == attrSet) lead |= 1;