X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=recctrl%2Frectext.c;h=bcc9d26dd873d1109042b1bbbc0fd028cd067927;hp=321e41428af0117fe4738696031d7d922bfd7742;hb=ef696645cc3b7e0f4027008d1dc589c0f0f90c1f;hpb=1300c402a9ecc32b470a7db96c0d93172ed1cf25 diff --git a/recctrl/rectext.c b/recctrl/rectext.c index 321e414..bcc9d26 100644 --- a/recctrl/rectext.c +++ b/recctrl/rectext.c @@ -1,10 +1,47 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rectext.c,v $ - * Revision 1.1 1996-10-11 10:57:28 adam + * Revision 1.12 1999-05-26 07:49:14 adam + * C++ compilation. + * + * Revision 1.11 1999/05/21 12:00:17 adam + * Better diagnostics for extraction process. + * + * Revision 1.10 1999/05/20 12:57:18 adam + * Implemented TCL filter. Updated recctrl system. + * + * Revision 1.9 1998/10/16 08:14:38 adam + * Updated record control system. + * + * Revision 1.8 1998/05/20 10:12:27 adam + * Implemented automatic EXPLAIN database maintenance. + * Modified Zebra to work with ASN.1 compiled version of YAZ. + * + * Revision 1.7 1998/03/11 11:19:05 adam + * Changed the way sequence numbers are generated. + * + * Revision 1.6 1998/02/10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.5 1997/10/27 14:33:06 adam + * Moved towards generic character mapping depending on "structure" + * field in abstract syntax file. Fixed a few memory leaks. Fixed + * bug with negative integers when doing searches with relational + * operators. + * + * Revision 1.4 1996/11/04 14:09:16 adam + * Minor changes. + * + * Revision 1.3 1996/11/01 09:00:33 adam + * This simple "text" format now supports element specs B and M. + * + * Revision 1.2 1996/10/29 14:02:45 adam + * Uses buffered read to speed up things. + * + * Revision 1.1 1996/10/11 10:57:28 adam * New module recctrl. Used to manage records (extract/retrieval). * * Revision 1.7 1996/01/17 14:57:55 adam @@ -36,48 +73,99 @@ #include #include -#include +#include #include "rectext.h" -static void text_init (void) +static void *text_init (RecType recType) +{ + return 0; +} + +static void text_destroy (void *clientData) +{ +} + +struct buf_info { + struct recExtractCtrl *p; + char *buf; + int offset; + int max; +}; + +struct buf_info *buf_open (struct recExtractCtrl *p) { + struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi)); + + fi->p = p; + fi->buf = (char *) xmalloc (4096); + fi->offset = 1; + fi->max = 1; + return fi; +} + +int buf_read (struct buf_info *fi, char *dst) +{ + if (fi->offset >= fi->max) + { + if (fi->max <= 0) + return 0; + fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->offset = 0; + if (fi->max <= 0) + return 0; + } + *dst = fi->buf[(fi->offset)++]; + return 1; } -static int text_extract (struct recExtractCtrl *p) +void buf_close (struct buf_info *fi) { - char w[256]; + xfree (fi->buf); + xfree (fi); +} + +static int text_extract (void *clientData, struct recExtractCtrl *p) +{ + char w[512]; RecWord recWord; - int r, seqno = 1; + int r; + struct buf_info *fi = buf_open (p); - (*p->init)(&recWord); - recWord.which = Word_String; + (*p->init)(p, &recWord); + recWord.reg_type = 'w'; do { int i = 0; - - r = (*p->readf)(p->fh, w, 1); - while (r > 0 && i < 255 && isalnum(w[i])) + + r = buf_read (fi, w); + while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r') { i++; - r = (*p->readf) (p->fh, w + i, 1); + r = buf_read (fi, w + i); } if (i) { - w[i] = 0; - recWord.seqno = seqno++; - recWord.u.string = w; - (*p->add)(&recWord); + recWord.string = w; + recWord.length = i; + (*p->addWord)(&recWord); } } while (r > 0); - return 0; + buf_close (fi); + return RECCTRL_EXTRACT_OK; } -static int text_retrieve (struct recRetrieveCtrl *p) +static int text_retrieve (void *clientData, struct recRetrieveCtrl *p) { int r, text_ptr = 0; static char *text_buf = NULL; static int text_size = 0; int start_flag = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; while (1) { @@ -86,7 +174,7 @@ static int text_retrieve (struct recRetrieveCtrl *p) char *nb; text_size = 2*text_size + 8192; - nb = xmalloc (text_size); + nb = (char *) xmalloc (text_size); if (text_buf) { memcpy (nb, text_buf, text_ptr); @@ -110,6 +198,27 @@ static int text_retrieve (struct recRetrieveCtrl *p) break; text_ptr += r; } + text_buf[text_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = text_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + text_ptr = p-text_buf; + } + } p->output_format = VAL_SUTRS; p->rec_buf = text_buf; p->rec_len = text_ptr; @@ -119,6 +228,7 @@ static int text_retrieve (struct recRetrieveCtrl *p) static struct recType text_type = { "text", text_init, + text_destroy, text_extract, text_retrieve };