-/* $Id: extract.c,v 1.148 2004-01-22 11:50:16 adam Exp $
+/* $Id: extract.c,v 1.154 2004-06-03 11:38:34 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
Index Data Aps
#include <stdio.h>
#include <assert.h>
+#include <ctype.h>
#ifdef WIN32
#include <io.h>
#else
char ext[128];
char ext_res[128];
struct file_read_info *fi;
+ const char *original_record_type = 0;
if (!zh->m_group || !*zh->m_group)
*gprefix = '\0';
break;
}
/* determine file type - depending on extension */
+ yaz_log(LOG_LOG, "recordType 1=%s", zh->m_record_type ?
+ zh->m_record_type : "<none>");
+ original_record_type = zh->m_record_type;
if (!zh->m_record_type)
{
sprintf (ext_res, "%srecordType.%s", gprefix, ext);
zh->m_record_type = res_get (zh->res, ext_res);
}
+ yaz_log(LOG_LOG, "recordType 2=%s", zh->m_record_type ?
+ zh->m_record_type : "<none>");
if (!zh->m_record_type)
{
if (zh->records_processed < zh->m_file_verbose_limit)
if ((fd = open (full_rep, O_BINARY|O_RDONLY)) == -1)
{
logf (LOG_WARN|LOG_ERRNO, "open %s", full_rep);
+ zh->m_record_type = original_record_type;
return 0;
}
}
file_read_stop (fi);
if (fd != -1)
close (fd);
+ zh->m_record_type = original_record_type;
return r;
}
#endif
if (zh->reg->key_buf_used + 1024 >
(zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*))
- extract_flushWriteKeys (zh);
+ extract_flushWriteKeys (zh,0);
+ assert(zh->reg->ptr_i >= 0);
++(zh->reg->ptr_i);
+ assert(zh->reg->ptr_i > 0);
(zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] =
(char*)zh->reg->key_buf + zh->reg->key_buf_used;
#if SU_SCHEME
assert (off == reckeys->buf_used);
}
-void extract_flushWriteKeys (ZebraHandle zh)
+void extract_flushWriteKeys (ZebraHandle zh, int final)
+ /* optimizing: if final=1, and no files written yet */
+ /* push the keys directly to merge, sidestepping the */
+ /* temp file altogether. Speeds small updates */
{
FILE *outf;
char out_fname[200];
char *prevcp, *cp;
struct encode_info encode_info;
int ptr_i = zh->reg->ptr_i;
+ int temp_policy;
#if SORT_EXTRA
int i;
#endif
if (!zh->reg->key_buf || ptr_i <= 0)
+ {
+ logf (LOG_DEBUG, " nothing to flush section=%d buf=%p i=%d",
+ zh->reg->key_file_no, zh->reg->key_buf, ptr_i);
+ logf (LOG_DEBUG, " buf=%p ",
+ zh->reg->key_buf);
+ logf (LOG_DEBUG, " ptr=%d ",zh->reg->ptr_i);
+ logf (LOG_DEBUG, " reg=%p ",zh->reg);
+
return;
+ }
(zh->reg->key_file_no)++;
logf (LOG_LOG, "sorting section %d", (zh->reg->key_file_no));
+ logf (LOG_DEBUG, " sort_buff at %p n=%d",
+ zh->reg->key_buf + zh->reg->ptr_top - ptr_i,ptr_i);
#if !SORT_EXTRA
qsort (zh->reg->key_buf + zh->reg->ptr_top - ptr_i, ptr_i,
sizeof(char*), key_qsort_compare);
+
+ /* zebra.cfg: tempfiles:
+ Y: always use temp files (old way)
+ A: use temp files, if more than one (auto)
+ = if this is both the last and the first
+ N: never bother with temp files (new) */
+
+ temp_policy=toupper(res_get_def(zh->res,"tempfiles","auto")[0]);
+ if (temp_policy != 'Y' && temp_policy != 'N' && temp_policy != 'A') {
+ logf (LOG_WARN, "Illegal tempfiles setting '%c'. using 'Auto' ",
+ temp_policy);
+ temp_policy='A';
+ }
+
+ if ( ( temp_policy =='N' ) || /* always from memory */
+ ( ( temp_policy =='A' ) && /* automatic */
+ (zh->reg->key_file_no == 1) && /* this is first time */
+ (final) ) ) /* and last (=only) time */
+ { /* go directly from memory */
+ zh->reg->key_file_no =0; /* signal not to read files */
+ zebra_index_merge(zh);
+ zh->reg->ptr_i = 0;
+ zh->reg->key_buf_used = 0;
+ return;
+ }
+
+ /* Not doing directly from memory, write into a temp file */
extract_get_fname_tmp (zh, out_fname, zh->reg->key_file_no);
if (!(outf = fopen (out_fname, "wb")))
}
dst = keys->buf + keys->buf_used;
+ /* leader byte is encoded as follows:
+ bit 0 : 1 if attrset is unchanged; 0 if attrset is changed
+ bit 1 : 1 if attruse is unchanged; 0 if attruse is changed
+ */
attrSet = p->attrSet;
if (keys->buf_used > 0 && keys->prevAttrSet == attrSet)
lead |= 1;