X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=e6d1aac6f326661abdd5ea6b7e08660078bda9bc;hb=959c65fe556f2937a2c0afc7741c473a0fe1a808;hp=a14b7f50f4724fb781a5cfe248b8bf769ec669f0;hpb=d57bb162b65a585e0ed3ad319d9673d66bc6d90c;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index a14b7f5..e6d1aac 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,10 +1,26 @@ -/* - * Copyright (C) 1994-2002, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Id: extract.c,v 1.119 2002-05-07 11:05:19 adam Exp $ - */ +/* $Id: extract.c,v 1.124 2002-10-16 09:30:57 heikki Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + #include #include #ifdef WIN32 @@ -24,6 +40,9 @@ #define PRINTF_OFF_T "%ld" #endif +#define USE_SHELLSORT 0 + +#if USE_SHELLSORT static void shellsort(void *ar, int r, size_t s, int (*cmp)(const void *a, const void *b)) { @@ -46,7 +65,7 @@ static void shellsort(void *ar, int r, size_t s, memcpy (a+s*j, v, s); } } - +#endif static void logRecord (ZebraHandle zh) { @@ -395,19 +414,6 @@ struct recordLogInfo { struct recordGroup *rGroup; }; -static void recordLogPreamble (int level, const char *msg, void *info) -{ - struct recordLogInfo *p = (struct recordLogInfo *) info; - FILE *outf = yaz_log_file (); - - if (level & LOG_LOG) - return ; - fprintf (outf, "File %s, offset %d, type %s\n", - p->rGroup->recordType, p->recordOffset, p->fname); - log_event_start (NULL, NULL); -} - - static int recordExtract (ZebraHandle zh, SYSNO *sysno, const char *fname, struct recordGroup *rGroup, int deleteFlag, @@ -419,7 +425,6 @@ static int recordExtract (ZebraHandle zh, char *matchStr; SYSNO sysnotmp; Record rec; - struct recordLogInfo logInfo; off_t recordOffset = 0; if (fi->fd != -1) @@ -460,19 +465,19 @@ static int recordExtract (ZebraHandle zh, if (!rGroup->flagRw) printf ("File: %s " PRINTF_OFF_T "\n", fname, recordOffset); - - logInfo.fname = fname; - logInfo.recordOffset = recordOffset; - logInfo.rGroup = rGroup; - log_event_start (recordLogPreamble, &logInfo); + if (rGroup->flagRw) + { + char msg[512]; + sprintf (msg, "%s:" PRINTF_OFF_T , fname, recordOffset); + yaz_log_init_prefix2 (msg); + } r = (*recType->extract)(clientData, &extractCtrl); - log_event_start (NULL, NULL); - + yaz_log_init_prefix2 (0); if (r == RECCTRL_EXTRACT_EOF) return 0; - else if (r == RECCTRL_EXTRACT_ERROR) + else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) { /* error occured during extraction ... */ if (rGroup->flagRw && @@ -483,6 +488,18 @@ static int recordExtract (ZebraHandle zh, } return 0; } + else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) + { + /* error occured during extraction ... */ + if (rGroup->flagRw && + zh->records_processed < rGroup->fileVerboseLimit) + { + logf (LOG_WARN, "no filter for %s %s " + PRINTF_OFF_T, rGroup->recordType, + fname, recordOffset); + } + return 0; + } if (zh->reg->keys.buf_used == 0) { /* the extraction process returned no information - the record @@ -939,19 +956,16 @@ int extract_rec_in_mem (ZebraHandle zh, const char *recordType, if (r == RECCTRL_EXTRACT_EOF) return 0; - else if (r == RECCTRL_EXTRACT_ERROR) + else if (r == RECCTRL_EXTRACT_ERROR_GENERIC) { /* error occured during extraction ... */ -#if 1 - yaz_log (LOG_WARN, "extract error"); -#else - if (rGroup->flagRw && - zh->records_processed < rGroup->fileVerboseLimit) - { - logf (LOG_WARN, "fail %s %s %ld", rGroup->recordType, - fname, (long) recordOffset); - } -#endif + yaz_log (LOG_WARN, "extract error: generic"); + return 0; + } + else if (r == RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER) + { + /* error occured during extraction ... */ + yaz_log (LOG_WARN, "extract error: no such filter"); return 0; } if (zh->reg->keys.buf_used == 0) @@ -1317,6 +1331,7 @@ void extract_flushWriteKeys (ZebraHandle zh) cp = (zh->reg->key_buf)[zh->reg->ptr_top - ptr_i]; if (strcmp (cp, prevcp)) { + encode_key_flush ( &encode_info, outf); encode_key_init (&encode_info); encode_key_write (cp, &encode_info, outf); prevcp = cp; @@ -1324,6 +1339,7 @@ void extract_flushWriteKeys (ZebraHandle zh) else encode_key_write (cp + strlen(cp), &encode_info, outf); } + encode_key_flush ( &encode_info, outf); #else qsort (key_buf + ptr_top-ptr_i, ptr_i, sizeof(char*), key_x_compare); extract_get_fname_tmp (out_fname, key_file_no); @@ -1355,6 +1371,7 @@ void extract_flushWriteKeys (ZebraHandle zh) cp = key_buf[ptr_top-ptr_i]; encode_key_write (cp+key_y_len, &encode_info, outf); } + encode_key_flush ( &encode_info, outf); if (!i) break; prevcp = key_buf[ptr_top-ptr_i]; @@ -1622,6 +1639,10 @@ void encode_key_init (struct encode_info *i) i->sysno = 0; i->seqno = 0; i->cmd = -1; + i->prevsys=0; + i->prevseq=0; + i->prevcmd=-1; + i->keylen=0; } char *encode_key_int (int d, char *bp) @@ -1649,6 +1670,11 @@ char *encode_key_int (int d, char *bp) return bp; } +#ifdef OLDENCODE +/* this is the old encode_key_write + * may be deleted once we are confident that the new works + * HL 15-oct-2002 + */ void encode_key_write (char *k, struct encode_info *i, FILE *outf) { struct it_key key; @@ -1675,3 +1701,126 @@ void encode_key_write (char *k, struct encode_info *i, FILE *outf) } } +void encode_key_flush (struct encode_info *i, FILE *outf) +{ /* dummy routine */ +} + +#else + +/* new encode_key_write + * The idea is to buffer one more key, and compare them + * If we are going to delete and insert the same key, + * we may as well not bother. Should make a difference in + * updates with small modifications (appending to a mbox) + */ +void encode_key_write (char *k, struct encode_info *i, FILE *outf) +{ + struct it_key key; + char *bp; + + if (*k) /* first time for new key */ + { + bp = i->buf; + while ((*bp++ = *k++)) + ; + i->keylen= bp - i->buf -1; + } + else + { + bp=i->buf + i->keylen; + *bp++=0; + k++; + } + + memcpy (&key, k+1, sizeof(struct it_key)); + if (0==i->prevsys) /* no previous filter, fill up */ + { + i->prevsys=key.sysno; + i->prevseq=key.seqno; + i->prevcmd=*k; + } + else if ( (i->prevsys==key.sysno) && + (i->prevseq==key.seqno) && + (i->prevcmd!=*k) ) + { /* same numbers, diff cmd, they cancel out */ + i->prevsys=0; + } + else + { /* different stuff, write previous, move buf */ + bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); + if (i->sysno != i->prevsys) + { + i->sysno = i->prevsys; + i->seqno = 0; + } + else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) + { + return; /* ??? Filters some sort of duplicates away */ + /* ??? Can this ever happen -H 15oct02 */ + } + bp = encode_key_int (i->prevseq - i->seqno, bp); + i->seqno = i->prevseq; + i->cmd = i->prevcmd; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ + i->prevsys=key.sysno; + i->prevseq=key.seqno; + i->prevcmd=*k; + } +#ifdef SKIPTHIS_OLDCODE + bp = encode_key_int ( (key.sysno - i->sysno) * 2 + *k, bp); + if (i->sysno != key.sysno) + { + i->sysno = key.sysno; + i->seqno = 0; + } + else if (!i->seqno && !key.seqno && i->cmd == *k) + return; + bp = encode_key_int (key.seqno - i->seqno, bp); + i->seqno = key.seqno; + i->cmd = *k; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ +#endif +} + +void encode_key_flush (struct encode_info *i, FILE *outf) +{ /* flush the last key from i */ + char *bp =i->buf + i->keylen; + if (0==i->prevsys) + { + return; /* nothing to flush */ + } + *bp++=0; + bp = encode_key_int ( (i->prevsys - i->sysno) * 2 + i->prevcmd, bp); + if (i->sysno != i->prevsys) + { + i->sysno = i->prevsys; + i->seqno = 0; + } + else if (!i->seqno && !i->prevseq && i->cmd == i->prevcmd) + { + return; /* ??? Filters some sort of duplicates away */ + /* ??? Can this ever happen -H 15oct02 */ + } + bp = encode_key_int (i->prevseq - i->seqno, bp); + i->seqno = i->prevseq; + i->cmd = i->prevcmd; + if (fwrite (i->buf, bp - i->buf, 1, outf) != 1) + { + logf (LOG_FATAL|LOG_ERRNO, "fwrite"); + exit (1); + } + i->keylen=0; /* ok, it's written, forget it */ + i->prevsys=0; /* forget the values too */ + i->prevseq=0; +} +#endif