X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frectext.c;h=c6668164dc8fa7ec154147c0639f5d1f4c1f51cf;hb=0e56fa84bb4a5985c19a21926e86862c08d84689;hp=0dba6455a67568022ea0190f718b1a93705e5414;hpb=94ad502634fd97a08c4af0731eb3eddca13a3691;p=idzebra-moved-to-github.git diff --git a/recctrl/rectext.c b/recctrl/rectext.c index 0dba645..c666816 100644 --- a/recctrl/rectext.c +++ b/recctrl/rectext.c @@ -1,49 +1,54 @@ -/* - * Copyright (C) 1994-1995, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: rectext.c,v $ - * Revision 1.2 1996-10-29 14:02:45 adam - * Uses buffered read to speed up things. - * - * Revision 1.1 1996/10/11 10:57:28 adam - * New module recctrl. Used to manage records (extract/retrieval). - * - * Revision 1.7 1996/01/17 14:57:55 adam - * Prototype changed for reader functions in extract/retrieve. File - * is identified by 'void *' instead of 'int. - * - * Revision 1.6 1995/10/10 13:59:24 adam - * Function rset_open changed its wflag parameter to general flags. - * - * Revision 1.5 1995/10/02 16:24:39 adam - * Use attribute actually used in search requests. - * - * Revision 1.4 1995/10/02 15:42:55 adam - * Extract uses file descriptors instead of FILE pointers. - * - * Revision 1.3 1995/09/28 09:19:45 adam - * xfree/xmalloc used everywhere. - * Extract/retrieve method seems to work for text records. - * - * Revision 1.2 1995/09/15 14:45:21 adam - * Retrieve control. - * Work on truncation. - * - * Revision 1.1 1995/09/14 07:48:25 adam - * Record control management. - * - */ +/* $Id: rectext.c,v 1.22 2004-11-19 10:27:13 heikki Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + #include #include #include #include -#include "rectext.h" +#include + +struct text_info { + char *sep; +}; -static void text_init (void) +static void *text_init (Res res, RecType recType) { + struct text_info *tinfo = (struct text_info *) xmalloc(sizeof(*tinfo)); + tinfo->sep = 0; + return tinfo; +} + +static void text_config(void *clientData, Res res, const char *args) +{ + +} + +static void text_destroy (void *clientData) +{ + struct text_info *tinfo = clientData; + xfree (tinfo->sep); + xfree (tinfo); } struct buf_info { @@ -55,27 +60,33 @@ struct buf_info { struct buf_info *buf_open (struct recExtractCtrl *p) { - struct buf_info *fi = xmalloc (sizeof(*fi)); + struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi)); fi->p = p; - fi->buf = xmalloc (4096); + fi->buf = (char *) xmalloc (4096); fi->offset = 1; fi->max = 1; return fi; } -int buf_read (struct buf_info *fi, char *dst) +int buf_read (struct text_info *tinfo, struct buf_info *fi, char *dst) { - if (fi->max <= 0) - return 0; if (fi->offset >= fi->max) { + if (fi->max <= 0) + return 0; fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); fi->offset = 0; if (fi->max <= 0) return 0; } *dst = fi->buf[(fi->offset)++]; + if (tinfo->sep && *dst == *tinfo->sep) + { + off_t off = (*fi->p->tellf)(fi->p->fh); + (*fi->p->endf)(fi->p->fh, off - (fi->max - fi->offset)); + return 0; + } return 1; } @@ -85,47 +96,72 @@ void buf_close (struct buf_info *fi) xfree (fi); } -static int text_extract (struct recExtractCtrl *p) +static int text_extract (void *clientData, struct recExtractCtrl *p) { - char w[256]; + struct text_info *tinfo = clientData; + char w[512]; RecWord recWord; - int r, seqno = 1; + int r; struct buf_info *fi = buf_open (p); - (*p->init)(&recWord); - recWord.which = Word_String; +#if 0 + yaz_log(YLOG_LOG, "text_extract off=%ld", + (long) (*fi->p->tellf)(fi->p->fh)); +#endif + xfree(tinfo->sep); + tinfo->sep = 0; + (*p->init)(p, &recWord); + recWord.reg_type = 'w'; do { int i = 0; - r = buf_read (fi, w); - while (r > 0 && i < 255 && isalnum(w[i])) + r = buf_read (tinfo, fi, w); + while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r') { i++; - r = buf_read (fi, w + i); - } + r = buf_read (tinfo, fi, w + i); + } if (i) { - int j; - for (j = 0; jadd)(&recWord); + recWord.string = w; + recWord.length = i; + (*p->tokenAdd)(&recWord); } } while (r > 0); buf_close (fi); - return 0; + return RECCTRL_EXTRACT_OK; } -static int text_retrieve (struct recRetrieveCtrl *p) +static int text_retrieve (void *clientData, struct recRetrieveCtrl *p) { int r, text_ptr = 0; static char *text_buf = NULL; static int text_size = 0; - int start_flag = 1; + int make_header = 1; + int make_body = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; + if (elementSetName) + { + /* don't make header for the R(aw) element set name */ + if (!strcmp(elementSetName, "R")) + { + make_header = 0; + make_body = 1; + } + /* only make header for the H(eader) element set name */ + else if (!strcmp(elementSetName, "H")) + { + make_header = 1; + make_body = 0; + } + } while (1) { if (text_ptr + 4096 >= text_size) @@ -133,7 +169,7 @@ static int text_retrieve (struct recRetrieveCtrl *p) char *nb; text_size = 2*text_size + 8192; - nb = xmalloc (text_size); + nb = (char *) xmalloc (text_size); if (text_buf) { memcpy (nb, text_buf, text_ptr); @@ -141,22 +177,51 @@ static int text_retrieve (struct recRetrieveCtrl *p) } text_buf = nb; } - if (start_flag) + if (make_header && text_ptr == 0) { - start_flag = 0; if (p->score >= 0) { sprintf (text_buf, "Rank: %d\n", p->score); text_ptr = strlen(text_buf); } - sprintf (text_buf + text_ptr, "Local Number: %d\n", p->localno); + sprintf (text_buf + text_ptr, "Local Number: " ZINT_FORMAT "\n", + p->localno); text_ptr = strlen(text_buf); + if (p->fname) + { + sprintf (text_buf + text_ptr, "Filename: %s\n", p->fname); + text_ptr = strlen(text_buf); + } + strcpy(text_buf+text_ptr++, "\n"); } + if (!make_body) + break; r = (*p->readf)(p->fh, text_buf + text_ptr, 4096); if (r <= 0) break; text_ptr += r; } + text_buf[text_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = text_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + text_ptr = p-text_buf; + } + } p->output_format = VAL_SUTRS; p->rec_buf = text_buf; p->rec_len = text_ptr; @@ -166,8 +231,20 @@ static int text_retrieve (struct recRetrieveCtrl *p) static struct recType text_type = { "text", text_init, + text_config, + text_destroy, text_extract, text_retrieve }; -RecType recTypeText = &text_type; +RecType +#ifdef IDZEBRA_STATIC_TEXT +idzebra_filter_text +#else +idzebra_filter +#endif + +[] = { + &text_type, + 0, +};