X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Frectext.c;h=f102e7cde287c35a11161f4a750345824bdc81b6;hb=ec84578232c3de8ad75cc35638bed1bbe14fb7f7;hp=0dba6455a67568022ea0190f718b1a93705e5414;hpb=94ad502634fd97a08c4af0731eb3eddca13a3691;p=idzebra-moved-to-github.git diff --git a/recctrl/rectext.c b/recctrl/rectext.c index 0dba645..f102e7c 100644 --- a/recctrl/rectext.c +++ b/recctrl/rectext.c @@ -1,10 +1,50 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-2001, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rectext.c,v $ - * Revision 1.2 1996-10-29 14:02:45 adam + * Revision 1.14 2001-01-22 11:41:41 adam + * Added support for raw retrieval (element set name "R"). + * + * Revision 1.13 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.12 1999/05/26 07:49:14 adam + * C++ compilation. + * + * Revision 1.11 1999/05/21 12:00:17 adam + * Better diagnostics for extraction process. + * + * Revision 1.10 1999/05/20 12:57:18 adam + * Implemented TCL filter. Updated recctrl system. + * + * Revision 1.9 1998/10/16 08:14:38 adam + * Updated record control system. + * + * Revision 1.8 1998/05/20 10:12:27 adam + * Implemented automatic EXPLAIN database maintenance. + * Modified Zebra to work with ASN.1 compiled version of YAZ. + * + * Revision 1.7 1998/03/11 11:19:05 adam + * Changed the way sequence numbers are generated. + * + * Revision 1.6 1998/02/10 12:03:06 adam + * Implemented Sort. + * + * Revision 1.5 1997/10/27 14:33:06 adam + * Moved towards generic character mapping depending on "structure" + * field in abstract syntax file. Fixed a few memory leaks. Fixed + * bug with negative integers when doing searches with relational + * operators. + * + * Revision 1.4 1996/11/04 14:09:16 adam + * Minor changes. + * + * Revision 1.3 1996/11/01 09:00:33 adam + * This simple "text" format now supports element specs B and M. + * + * Revision 1.2 1996/10/29 14:02:45 adam * Uses buffered read to speed up things. * * Revision 1.1 1996/10/11 10:57:28 adam @@ -42,7 +82,12 @@ #include #include "rectext.h" -static void text_init (void) +static void *text_init (RecType recType) +{ + return 0; +} + +static void text_destroy (void *clientData) { } @@ -55,10 +100,10 @@ struct buf_info { struct buf_info *buf_open (struct recExtractCtrl *p) { - struct buf_info *fi = xmalloc (sizeof(*fi)); + struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi)); fi->p = p; - fi->buf = xmalloc (4096); + fi->buf = (char *) xmalloc (4096); fi->offset = 1; fi->max = 1; return fi; @@ -66,10 +111,10 @@ struct buf_info *buf_open (struct recExtractCtrl *p) int buf_read (struct buf_info *fi, char *dst) { - if (fi->max <= 0) - return 0; if (fi->offset >= fi->max) { + if (fi->max <= 0) + return 0; fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); fi->offset = 0; if (fi->max <= 0) @@ -85,47 +130,52 @@ void buf_close (struct buf_info *fi) xfree (fi); } -static int text_extract (struct recExtractCtrl *p) +static int text_extract (void *clientData, struct recExtractCtrl *p) { - char w[256]; + char w[512]; RecWord recWord; - int r, seqno = 1; + int r; struct buf_info *fi = buf_open (p); - (*p->init)(&recWord); - recWord.which = Word_String; + (*p->init)(p, &recWord); + recWord.reg_type = 'w'; do { int i = 0; r = buf_read (fi, w); - while (r > 0 && i < 255 && isalnum(w[i])) + while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r') { i++; r = buf_read (fi, w + i); } if (i) { - int j; - for (j = 0; jadd)(&recWord); + recWord.string = w; + recWord.length = i; + (*p->tokenAdd)(&recWord); } } while (r > 0); buf_close (fi); - return 0; + return RECCTRL_EXTRACT_OK; } -static int text_retrieve (struct recRetrieveCtrl *p) +static int text_retrieve (void *clientData, struct recRetrieveCtrl *p) { int r, text_ptr = 0; static char *text_buf = NULL; static int text_size = 0; int start_flag = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; + /* don't make header for the R(aw) element set name */ + if (elementSetName && !strcmp(elementSetName, "R")) + start_flag = 0; while (1) { if (text_ptr + 4096 >= text_size) @@ -133,7 +183,7 @@ static int text_retrieve (struct recRetrieveCtrl *p) char *nb; text_size = 2*text_size + 8192; - nb = xmalloc (text_size); + nb = (char *) xmalloc (text_size); if (text_buf) { memcpy (nb, text_buf, text_ptr); @@ -157,6 +207,27 @@ static int text_retrieve (struct recRetrieveCtrl *p) break; text_ptr += r; } + text_buf[text_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = text_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + text_ptr = p-text_buf; + } + } p->output_format = VAL_SUTRS; p->rec_buf = text_buf; p->rec_len = text_ptr; @@ -166,6 +237,7 @@ static int text_retrieve (struct recRetrieveCtrl *p) static struct recType text_type = { "text", text_init, + text_destroy, text_extract, text_retrieve };