From 39cb125e7755c6058a93dada8d5172804213cf8e Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 29 Nov 2004 21:55:25 +0000 Subject: [PATCH] Add support for section/chapter indexing. Add safari filter. --- configure.in | 3 +- include/idzebra/api.h | 16 ++- include/idzebra/recctrl.h | 7 +- index/extract.c | 121 ++++++++++++--------- index/index.h | 15 +-- index/rank1.c | 6 +- index/zebraapi.c | 118 +++++--------------- index/zrpn.c | 10 +- index/zsets.c | 79 +++----------- recctrl/Makefile.am | 13 ++- recctrl/recctrl.c | 8 +- recctrl/safari.c | 264 +++++++++++++++++++++++++++++++++++++++++++++ test/api/Makefile.am | 51 +++++---- test/api/safari.cfg | 9 ++ test/api/safari1.c | 62 +++++++++++ test/api/testlib.c | 65 ++++++++--- test/api/testlib.h | 8 +- test/codec/tstcodec.c | 44 +++++++- 18 files changed, 620 insertions(+), 279 deletions(-) create mode 100644 recctrl/safari.c create mode 100644 test/api/safari.cfg create mode 100644 test/api/safari1.c diff --git a/configure.in b/configure.in index 84b5ebb..2c2dae6 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Zebra, Index Data Aps, 1995-2004 -dnl $Id: configure.in,v 1.106 2004-11-19 18:36:06 adam Exp $ +dnl $Id: configure.in,v 1.107 2004-11-29 21:55:25 adam Exp $ dnl AC_INIT(include/idzebra/version.h) AM_INIT_AUTOMAKE(idzebra,1.4.0) @@ -253,6 +253,7 @@ ZEBRA_MODULE(grs-sgml,static,[ --enable-mod-grs-sgml Simple SGML/XML filter]) ZEBRA_MODULE(grs-regx,shared,[ --enable-mod-grs-regx REGX/TCL filter]) ZEBRA_MODULE(grs-marc,shared,[ --enable-mod-grs-marc MARC filter]) ZEBRA_MODULE(grs-danbib,shared,[ --enable-mod-grs-danbib DanBib filter (DBC)]) +ZEBRA_MODULE(safari,shared,[ --enable-mod-safari Safari filter (DBC)]) if test "$ac_cv_header_expat_h" = "yes"; then def="shared" else diff --git a/include/idzebra/api.h b/include/idzebra/api.h index 75a5d65..6b53295 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -1,4 +1,4 @@ -/* $Id: api.h,v 1.2 2004-09-27 10:44:48 adam Exp $ +/* $Id: api.h,v 1.3 2004-11-29 21:55:25 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -265,5 +265,19 @@ const char *zebra_get_resource(ZebraHandle zh, YAZ_EXPORT void zebra_pidfname(ZebraService zs, char *path); +typedef struct { + char *term; + char *db; + zint sysno; + int score; +} ZebraMetaRecord; + +YAZ_EXPORT ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, + const char *name, + int num, + zint *positions); + +YAZ_EXPORT void zebra_meta_records_destroy (ZebraHandle zh, + ZebraMetaRecord *records, int num); YAZ_END_CDECL #endif diff --git a/include/idzebra/recctrl.h b/include/idzebra/recctrl.h index de543f4..6f10701 100644 --- a/include/idzebra/recctrl.h +++ b/include/idzebra/recctrl.h @@ -1,4 +1,4 @@ -/* $Id: recctrl.h,v 1.4 2004-11-29 21:45:11 adam Exp $ +/* $Id: recctrl.h,v 1.5 2004-11-29 21:55:25 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -39,7 +39,9 @@ typedef struct { unsigned reg_type; char *string; int length; - int seqno; + zint seqno; + zint record_id; + zint section_id; ZebraMaps zebra_maps; struct recExtractCtrl *extractCtrl; } RecWord; @@ -58,6 +60,7 @@ struct recExtractCtrl { ZebraMaps zebra_maps; int flagShowRecords; int seqno[256]; + char match_criteria[256]; void (*schemaAdd)(struct recExtractCtrl *p, Odr_oid *oid); data1_handle dh; void *handle; diff --git a/index/extract.c b/index/extract.c index 5c17db5..36c3eb9 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.166 2004-11-19 10:26:56 heikki Exp $ +/* $Id: extract.c,v 1.167 2004-11-29 21:55:25 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -87,6 +87,8 @@ static void extract_init (struct recExtractCtrl *p, RecWord *w) w->attrUse = 1016; w->reg_type = 'w'; w->extractCtrl = p; + w->record_id = 0; + w->section_id = 0; } static const char **searchRecordKey (ZebraHandle zh, @@ -113,9 +115,9 @@ static const char **searchRecordKey (ZebraHandle zh, iscz1_decode(decode_handle, &dst, &src); assert(key.len < 4 && key.len > 2); - attrSet = (int) key.mem[0]; - attrUse = (int) key.mem[1]; - seqno = (int) key.mem[2]; + attrSet = (int) key.mem[0] >> 16; + attrUse = (int) key.mem[0] & 65535; + seqno = (int) key.mem[key.len-1]; if (attrUseS == attrUse && attrSetS == attrSet) { @@ -394,7 +396,7 @@ static int file_extract_record(ZebraHandle zh, { RecordAttr *recordAttr; int r; - const char *matchStr; + const char *matchStr = 0; SYSNO sysnotmp; Record rec; off_t recordOffset = 0; @@ -439,6 +441,7 @@ static int file_extract_record(ZebraHandle zh, extractCtrl.tokenAdd = extract_token_add; extractCtrl.schemaAdd = extract_schema_add; extractCtrl.dh = zh->reg->dh; + extractCtrl.match_criteria[0] = '\0'; extractCtrl.handle = zh; for (i = 0; i<256; i++) { @@ -499,36 +502,36 @@ static int file_extract_record(ZebraHandle zh, fname, recordOffset); return 1; } + if (extractCtrl.match_criteria[0]) + matchStr = extractCtrl.match_criteria; } /* perform match if sysno not known and if match criteria is specified */ - - matchStr = NULL; if (!sysno) { sysnotmp = 0; sysno = &sysnotmp; - if (zh->m_record_id && *zh->m_record_id) + + if (matchStr == 0 && zh->m_record_id && *zh->m_record_id) { - char *rinfo; matchStr = fileMatchStr (zh, &zh->reg->keys, fname, zh->m_record_id); - if (matchStr) - { - rinfo = dict_lookup (zh->reg->matchDict, matchStr); - if (rinfo) - { - assert(*rinfo == sizeof(*sysno)); - memcpy (sysno, rinfo+1, sizeof(*sysno)); - } - } - else - { - yaz_log (YLOG_WARN, "Bad match criteria"); - return 0; - } - } + if (!matchStr) + { + yaz_log(YLOG_WARN, "Bad match criteria"); + return 0; + } + } + if (matchStr) + { + char *rinfo = dict_lookup (zh->reg->matchDict, matchStr); + if (rinfo) + { + assert(*rinfo == sizeof(*sysno)); + memcpy (sysno, rinfo+1, sizeof(*sysno)); + } + } } if (! *sysno) @@ -884,6 +887,7 @@ int buffer_extract_record (ZebraHandle zh, extractCtrl.handle = zh; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; + extractCtrl.match_criteria[0] = '\0'; for (i = 0; i<256; i++) { if (zebra_maps_is_positioned(zh->reg->zebra_maps, i)) @@ -921,6 +925,9 @@ int buffer_extract_record (ZebraHandle zh, /* match criteria */ matchStr = NULL; + if (extractCtrl.match_criteria[0]) + match_criteria = extractCtrl.match_criteria; + if (! *sysno) { char *rinfo; if (match_criteria && *match_criteria) { @@ -1159,6 +1166,7 @@ int explain_extract (void *handle, Record rec, data1_node *n) extractCtrl.seqno[i] = 0; extractCtrl.zebra_maps = zh->reg->zebra_maps; extractCtrl.flagShowRecords = 0; + extractCtrl.match_criteria[0] = '\0'; extractCtrl.handle = handle; if (n) @@ -1230,10 +1238,10 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int attrSet, attrUse; iscz1_decode(decode_handle, &dst, &src); - assert(key.len < 4 && key.len > 2); + assert(key.len == 4); - attrSet = (int) key.mem[0]; - attrUse = (int) key.mem[1]; /* sequence in mem[2] */ + attrSet = (int) key.mem[0] >> 16; + attrUse = (int) key.mem[0] & 65535; if (zh->reg->key_buf_used + 1024 > (zh->reg->ptr_top -zh->reg->ptr_i)*sizeof(char*)) @@ -1257,10 +1265,14 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = '\0'; ((char*)(zh->reg->key_buf))[(zh->reg->key_buf_used)++] = cmd; - key.len = 2; - key.mem[0] = sysno; - key.mem[1] = key.mem[2]; /* sequence .. */ - + key.len = 3; + if (key.mem[1]) /* filter specify record ID */ + key.mem[0] = key.mem[1]; + else + key.mem[0] = sysno; + key.mem[1] = key.mem[2]; /* section_id */ + key.mem[2] = key.mem[3]; /* sequence .. */ + memcpy ((char*)zh->reg->key_buf + zh->reg->key_buf_used, &key, sizeof(key)); (zh->reg->key_buf_used) += sizeof(key); @@ -1404,13 +1416,13 @@ void extract_flushWriteKeys (ZebraHandle zh, int final) zh->reg->key_buf_used = 0; } -void extract_add_index_string (RecWord *p, const char *str, int length) +void extract_add_it_key (ZebraHandle zh, + int reg_type, + const char *str, int slen, struct it_key *key) { char *dst; - ZebraHandle zh = p->extractCtrl->handle; struct recKeys *keys = &zh->reg->keys; - struct it_key key; - const char *src = (char*) &key; + const char *src = (char*) key; if (keys->buf_used+1024 > keys->buf_max) { @@ -1424,26 +1436,35 @@ void extract_add_index_string (RecWord *p, const char *str, int length) } dst = keys->buf + keys->buf_used; - key.len = 3; - key.mem[0] = p->attrSet; - key.mem[1] = p->attrUse; - key.mem[2] = p->seqno; - -#if 0 - /* just for debugging .. */ - yaz_log(YLOG_LOG, "set=%d use=%d seqno=%d", p->attrSet, p->attrUse, - p->seqno); -#endif - iscz1_encode(keys->codec_handle, &dst, &src); - *dst++ = p->reg_type; - memcpy (dst, str, length); - dst += length; + *dst++ = reg_type; + memcpy (dst, str, slen); + dst += slen; *dst++ = '\0'; keys->buf_used = dst - keys->buf; } +void extract_add_index_string (RecWord *p, const char *str, int length) +{ + struct it_key key; + key.len = 4; + key.mem[0] = p->attrSet * 65536 + p->attrUse; + key.mem[1] = p->record_id; + key.mem[2] = p->section_id; + key.mem[3] = p->seqno; + +#if 1 + /* just for debugging .. */ + yaz_log(YLOG_LOG, "add: set=%d use=%d " + "record_id=%lld section_id=%lld seqno=%lld", + p->attrSet, p->attrUse, p->record_id, p->section_id, p->seqno); +#endif + + extract_add_it_key(p->extractCtrl->handle, p->reg_type, str, + length, &key); +} + static void extract_add_sort_string (RecWord *p, const char *str, int length) { @@ -1601,7 +1622,7 @@ yaz_log(YLOG_DEBUG, "Complete field, w='%.*s'", p->length, p->string); void extract_token_add (RecWord *p) { WRBUF wrbuf; -#if 0 +#if 1 yaz_log (YLOG_LOG, "token_add " "reg_type=%c attrSet=%d attrUse=%d seqno=%d s=%.*s", p->reg_type, p->attrSet, p->attrUse, p->seqno, p->length, diff --git a/index/index.h b/index/index.h index 2af7982..6f005d8 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.125 2004-11-29 21:45:11 adam Exp $ +/* $Id: index.h,v 1.126 2004-11-29 21:55:25 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -182,13 +182,6 @@ char *encode_key_int (int d, char *bp); void encode_key_write (char *k, struct encode_info *i, FILE *outf); void encode_key_flush (struct encode_info *i, FILE *outf); -typedef struct { - char *term; - char *db; - zint sysno; - int score; -} *ZebraPosSet; - typedef struct zebra_set *ZebraSet; typedef struct zebra_rank_class { @@ -368,12 +361,6 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, RSET resultSetRef (ZebraHandle zh, const char *resultSetId); void resultSetDestroy (ZebraHandle zh, int num_names, char **names, int *statuses); - - -ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, - int num, int *positions); -void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num); - void resultSetSort (ZebraHandle zh, NMEM nmem, int num_input_setnames, const char **input_setnames, const char *output_setname, diff --git a/index/rank1.c b/index/rank1.c index df7b7c1..c79edea 100644 --- a/index/rank1.c +++ b/index/rank1.c @@ -1,5 +1,5 @@ -/* $Id: rank1.c,v 1.20 2004-11-04 13:09:06 heikki Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 +/* $Id: rank1.c,v 1.21 2004-11-29 21:55:25 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -20,8 +20,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - - #include #include #ifdef WIN32 diff --git a/index/zebraapi.c b/index/zebraapi.c index 0bd24c2..377ab7b 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.141 2004-11-29 21:45:11 adam Exp $ +/* $Id: zebraapi.c,v 1.142 2004-11-29 21:55:27 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -851,8 +851,9 @@ int zebra_records_retrieve (ZebraHandle zh, ODR stream, oid_value input_format, int num_recs, ZebraRetrievalRecord *recs) { - ZebraPosSet poset; - int i, *pos_array, ret = 0; + ZebraMetaRecord *poset; + int i, ret = 0; + zint *pos_array; ASSERTZH; assert(stream); assert(setname); @@ -865,7 +866,7 @@ int zebra_records_retrieve (ZebraHandle zh, ODR stream, if (!zh->res) { zh->errCode = 30; - zh->errString = odr_strdup (stream, setname); + zh->errString = odr_strdup(stream, setname); return -1; } @@ -874,10 +875,10 @@ int zebra_records_retrieve (ZebraHandle zh, ODR stream, if (zebra_begin_read (zh)) return -1; - pos_array = (int *) xmalloc (num_recs * sizeof(*pos_array)); + pos_array = (zint *) xmalloc(num_recs * sizeof(*pos_array)); for (i = 0; i 0) + { + recs[i].buf = (char*) odr_malloc(stream, len); + memcpy(recs[i].buf, buf, len); + } + else + recs[i].buf = buf; recs[i].score=poset[i].score; recs[i].sysno=poset[i].sysno; } @@ -913,14 +922,14 @@ int zebra_records_retrieve (ZebraHandle zh, ODR stream, { char num_str[20]; - sprintf (num_str, "%d", pos_array[i]); + sprintf (num_str, ZINT_FORMAT, pos_array[i]); zh->errCode = 13; zh->errString = odr_strdup (stream, num_str); ret = -1; break; } } - zebraPosSetDestroy (zh, poset, num_recs); + zebra_meta_records_destroy(zh, poset, num_recs); } zebra_end_read (zh); xfree (pos_array); @@ -1945,94 +1954,17 @@ int zebra_set_shadow_enable (ZebraHandle zh, int value) return 0; } -/* almost the same as zebra_records_retrieve ... but how did it work? - I mean for multiple records ??? CHECK ??? */ +/* Used by Perl API.. Added the record buffer dup to zebra_records_retrieve + so that it's identicical to the original api_records_retrieve */ void api_records_retrieve (ZebraHandle zh, ODR stream, const char *setname, Z_RecordComposition *comp, oid_value input_format, int num_recs, ZebraRetrievalRecord *recs) { - ZebraPosSet poset; - int i, *pos_array; - ASSERTZH; - assert(stream); - assert(setname); - assert(comp); - assert(recs); - assert(num_recs>0); - yaz_log(log_level,"api_records_retrieve s=%s n=%d",setname,num_recs); - - if (!zh->res) - { - zh->errCode = 30; - zh->errString = odr_strdup (stream, setname); - return; - } - - zh->errCode = 0; - - if (zebra_begin_read (zh)) - return; - - pos_array = (int *) xmalloc (num_recs * sizeof(*pos_array)); - for (i = 0; ierrCode = 30; - zh->errString = nmem_strdup (stream->mem, setname); - } - else - { - for (i = 0; ierrCode = 13; - zh->errString = odr_strdup (stream, num_str); - break; - } - - } - zebraPosSetDestroy (zh, poset, num_recs); - } - zebra_end_read (zh); - xfree (pos_array); + zebra_records_retrieve(zh, stream, setname, comp, input_format, + num_recs, recs); } - /* --------------------------------------------------------------------------- Record insert(=update), delete diff --git a/index/zrpn.c b/index/zrpn.c index 0b616ca..e9fc915 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.162 2004-11-26 12:20:48 adam Exp $ +/* $Id: zrpn.c,v 1.163 2004-11-29 21:55:27 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -2243,10 +2243,10 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs, } -RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem, - Z_RPNQuery *rpn, int num_bases, char **basenames, - const char *setname, - ZebraSet sset) +RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, + Z_RPNQuery *rpn, int num_bases, char **basenames, + const char *setname, + ZebraSet sset) { RSET rset; oident *attrset; diff --git a/index/zsets.c b/index/zsets.c index cbdc8b1..6fe00c4 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.71 2004-11-19 10:27:09 heikki Exp $ +/* $Id: zsets.c,v 1.72 2004-11-29 21:55:28 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -137,60 +137,6 @@ void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, (s->hits)++; } -#if 0 /* FIXME - Delete this, we don't count terms no more */ -int zebra_resultSetTerms (ZebraHandle zh, const char *setname, - int no, zint *count, - int *type, char *out, size_t *len) -{ - ZebraSet s = resultSetGet (zh, setname); - int no_max = 0; - - if (count) - *count = 0; - if (!s || !s->rset) - return 0; - no_max = s->rset->no_rset_terms; - if (no < 0 || no >= no_max) - return 0; - if (count) - *count = s->rset->rset_terms[no]->count; - if (type) - *type = s->rset->rset_terms[no]->type; - - if (out) - { - char *inbuf = s->rset->rset_terms[no]->name; - size_t inleft = strlen(inbuf); - size_t outleft = *len - 1; - int converted = 0; - - if (zh->iconv_from_utf8 != 0) - { - char *outbuf = out; - size_t ret; - - ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft, - &outbuf, &outleft); - if (ret == (size_t)(-1)) - *len = 0; - else - *len = outbuf - out; - converted = 1; - } - if (!converted) - { - if (inleft > outleft) - inleft = outleft; - *len = inleft; - memcpy (out, inbuf, *len); - } - out[*len] = 0; - } - return no_max; -} - -#endif - ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) { ZebraSet s; @@ -332,11 +278,11 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses) } } -ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, - int num, int *positions) +ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name, + int num, zint *positions) { ZebraSet sset; - ZebraPosSet sr = 0; + ZebraMetaRecord *sr = 0; RSET rset; int i; struct zset_sort_info *sort_info; @@ -347,7 +293,7 @@ ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, { if (!sset->term_entries) return 0; - sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num); + sr = (ZebraMetaRecord *) xmalloc (sizeof(*sr) * num); for (i = 0; icalc) (handle, psysno); @@ -863,9 +812,9 @@ void zebraRankDestroy (struct zebra_register *reg) ZebraRankClass p_next = p->next; if (p->init_flag && p->control->destroy) (*p->control->destroy)(reg, p->class_handle); - xfree (p->control->name); - xfree (p->control); - xfree (p); + xfree(p->control->name); + xfree(p->control); + xfree(p); p = p_next; } reg->rank_classes = NULL; diff --git a/recctrl/Makefile.am b/recctrl/Makefile.am index 7ed19b9..4945e56 100644 --- a/recctrl/Makefile.am +++ b/recctrl/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.14 2004-09-28 12:52:03 adam Exp $ +## $Id: Makefile.am,v 1.15 2004-11-29 21:55:28 adam Exp $ common_libs = libidzebra-recctrl.la \ ../data1/libidzebra-data1.la \ @@ -27,12 +27,18 @@ mod_grs_danbib_la_LDFLAGS = -rpath $(pkglibdir) -module -avoid-version mod_grs_danbib_la_LADD = mod_grs_danbib_la_LIBADD = $(common_libs) $(mod_grs_danbib_la_LADD) +mod_safari_la_SOURCES = safari.c +mod_safari_la_LDFLAGS = -rpath $(pkglibdir) -module -avoid-version +mod_safari_la_LADD = +mod_safari_la_LIBADD = $(common_libs) $(mod_safari_la_LADD) + pkglib_LTLIBRARIES = $(SHARED_MODULE_LA) EXTRA_LTLIBRARIES = \ mod-grs-regx.la \ mod-grs-xml.la \ mod-grs-marc.la \ - mod-grs-danbib.la + mod-grs-danbib.la \ + mod-safari.la # The common library lib_LTLIBRARIES = libidzebra-recctrl.la @@ -49,6 +55,7 @@ EXTRA_libidzebra_recctrl_la_SOURCES = \ $(mod_grs_regx_la_SOURCES) \ $(mod_grs_xml_la_SOURCES) \ $(mod_grs_marc_la_SOURCES) \ - $(mod_grs_danbib_la_SOURCES) + $(mod_grs_danbib_la_SOURCES) \ + $(mod_safari_la_SOURCES) AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) $(TCL_INCLUDE) -DDEFAULT_MODULE_PATH=\"$(pkglibdir)\" diff --git a/recctrl/recctrl.c b/recctrl/recctrl.c index c4dbdbc..a19874f 100644 --- a/recctrl/recctrl.c +++ b/recctrl/recctrl.c @@ -1,4 +1,4 @@ -/* $Id: recctrl.c,v 1.13 2004-11-19 10:27:12 heikki Exp $ +/* $Id: recctrl.c,v 1.14 2004-11-29 21:55:28 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -65,6 +65,9 @@ struct recTypes { #ifdef IDZEBRA_STATIC_GRS_DANBIB extern RecType idzebra_filter_grs_danbib[]; #endif +#ifdef IDZEBRA_STATIC_SAFARI + extern RecType idzebra_filter_safari[]; +#endif static void recTypeClass_add (struct recTypeClass **rts, RecType *rt, NMEM nmem, void *module_handle); @@ -94,6 +97,9 @@ RecTypeClass recTypeClass_create (Res res, NMEM nmem) #ifdef IDZEBRA_STATIC_GRS_DANBIB recTypeClass_add (&rts, idzebra_filter_grs_danbib, nmem, 0); #endif +#ifdef IDZEBRA_STATIC_SAFARI + recTypeClass_add (&rts, idzebra_filter_safari, nmem, 0); +#endif if (module_path) { diff --git a/recctrl/safari.c b/recctrl/safari.c new file mode 100644 index 0000000..a21179f --- /dev/null +++ b/recctrl/safari.c @@ -0,0 +1,264 @@ +/* $Id: safari.c,v 1.1 2004-11-29 21:55:28 adam Exp $ + Copyright (C) 2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + + +#include +#include +#include + +#include +#include + +struct safari_info { + char *sep; +}; + +static void *safari_init (Res res, RecType recType) +{ + struct safari_info *tinfo = (struct safari_info *) xmalloc(sizeof(*tinfo)); + tinfo->sep = 0; + return tinfo; +} + +static void safari_config(void *clientData, Res res, const char *args) +{ + +} + +static void safari_destroy(void *clientData) +{ + struct safari_info *tinfo = clientData; + xfree (tinfo->sep); + xfree (tinfo); +} + +struct fi_info { + struct recExtractCtrl *p; + char *buf; + int offset; + int max; +}; + +struct fi_info *fi_open(struct recExtractCtrl *p) +{ + struct fi_info *fi = (struct fi_info *) xmalloc (sizeof(*fi)); + + fi->p = p; + fi->buf = (char *) xmalloc (4096); + fi->offset = 1; + fi->max = 1; + return fi; +} + +int fi_getchar(struct fi_info *fi, char *dst) +{ + if (fi->offset >= fi->max) + { + if (fi->max <= 0) + return 0; + fi->max = (*fi->p->readf)(fi->p->fh, fi->buf, 4096); + fi->offset = 0; + if (fi->max <= 0) + return 0; + } + *dst = fi->buf[(fi->offset)++]; + return 1; +} + +int fi_gets(struct fi_info *fi, char *dst, int max) +{ + int l; + for (l = 0; l < max; l++) + { + if (!fi_getchar(fi, dst+l)) + return 0; + if (dst[l] == '\n') + break; + } + dst[l] = '\0'; + return 1; +} + +void fi_close (struct fi_info *fi) +{ + xfree (fi->buf); + xfree (fi); +} + +static int safari_extract(void *clientData, struct recExtractCtrl *p) +{ + struct safari_info *tinfo = clientData; + char line[512]; + RecWord recWord; + struct fi_info *fi = fi_open(p); + +#if 0 + yaz_log(YLOG_LOG, "safari_extract off=%ld", + (long) (*fi->p->tellf)(fi->p->fh)); +#endif + xfree(tinfo->sep); + tinfo->sep = 0; + (*p->init)(p, &recWord); + + if (!fi_gets(fi, line, sizeof(line)-1)) + return RECCTRL_EXTRACT_ERROR_GENERIC; + sscanf(line, "%255s", p->match_criteria); + + recWord.reg_type = 'w'; + while (fi_gets(fi, line, sizeof(line)-1)) + { + int nor = 0; + char field[40]; + char *cp; + yaz_log(YLOG_LOG, "safari line: %s", line); + if (sscanf(line, "%lld %lld %lld %39s %n", + &recWord.record_id, &recWord.section_id, &recWord.seqno, + field, &nor) < 4) + { + yaz_log(YLOG_WARN, "Bad safari record line: %s", line); + return RECCTRL_EXTRACT_ERROR_GENERIC; + } + for (cp = line + nor; *cp == ' '; cp++) + ; + recWord.string = cp; + recWord.length = strlen(cp); + (*p->tokenAdd)(&recWord); + } + fi_close(fi); + return RECCTRL_EXTRACT_OK; +} + +static int safari_retrieve (void *clientData, struct recRetrieveCtrl *p) +{ + int r, safari_ptr = 0; + static char *safari_buf = NULL; + static int safari_size = 0; + int make_header = 1; + int make_body = 1; + const char *elementSetName = NULL; + int no_lines = 0; + + if (p->comp && p->comp->which == Z_RecordComp_simple && + p->comp->u.simple->which == Z_ElementSetNames_generic) + elementSetName = p->comp->u.simple->u.generic; + + if (elementSetName) + { + /* don't make header for the R(aw) element set name */ + if (!strcmp(elementSetName, "R")) + { + make_header = 0; + make_body = 1; + } + /* only make header for the H(eader) element set name */ + else if (!strcmp(elementSetName, "H")) + { + make_header = 1; + make_body = 0; + } + } + while (1) + { + if (safari_ptr + 4096 >= safari_size) + { + char *nb; + + safari_size = 2*safari_size + 8192; + nb = (char *) xmalloc (safari_size); + if (safari_buf) + { + memcpy (nb, safari_buf, safari_ptr); + xfree (safari_buf); + } + safari_buf = nb; + } + if (make_header && safari_ptr == 0) + { + if (p->score >= 0) + { + sprintf (safari_buf, "Rank: %d\n", p->score); + safari_ptr = strlen(safari_buf); + } + sprintf (safari_buf + safari_ptr, "Local Number: " ZINT_FORMAT "\n", + p->localno); + safari_ptr = strlen(safari_buf); + if (p->fname) + { + sprintf (safari_buf + safari_ptr, "Filename: %s\n", p->fname); + safari_ptr = strlen(safari_buf); + } + strcpy(safari_buf+safari_ptr++, "\n"); + } + if (!make_body) + break; + r = (*p->readf)(p->fh, safari_buf + safari_ptr, 4096); + if (r <= 0) + break; + safari_ptr += r; + } + safari_buf[safari_ptr] = '\0'; + if (elementSetName) + { + if (!strcmp (elementSetName, "B")) + no_lines = 4; + if (!strcmp (elementSetName, "M")) + no_lines = 20; + } + if (no_lines) + { + char *p = safari_buf; + int i = 0; + + while (++i <= no_lines && (p = strchr (p, '\n'))) + p++; + if (p) + { + p[1] = '\0'; + safari_ptr = p-safari_buf; + } + } + p->output_format = VAL_SUTRS; + p->rec_buf = safari_buf; + p->rec_len = safari_ptr; + return 0; +} + +static struct recType safari_type = { + "safari", + safari_init, + safari_config, + safari_destroy, + safari_extract, + safari_retrieve +}; + +RecType +#ifdef IDZEBRA_STATIC_SAFARI +idzebra_filter_safari +#else +idzebra_filter +#endif + +[] = { + &safari_type, + 0, +}; diff --git a/test/api/Makefile.am b/test/api/Makefile.am index fb5af8d..d79b4f5 100644 --- a/test/api/Makefile.am +++ b/test/api/Makefile.am @@ -1,32 +1,39 @@ -# $Id: Makefile.am,v 1.25 2004-11-01 11:45:39 heikki Exp $ +# $Id: Makefile.am,v 1.26 2004-11-29 21:55:28 adam Exp $ noinst_PROGRAMS = testclient testclient_SOURCES = testclient.c simpletests = t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 xpathtests= xpath1 xpath2 xpath3 xpath4 xpath5 -#check_PROGRAMS = t1 t2 t3 t4 t5 t6 t7 t8 t9 t10 xpath1 -check_PROGRAMS = ${simpletests} ${xpathtests} +safaritests = safari1 +check_PROGRAMS = $(simpletests) $(xpathtests) $(safaritests) TESTS = $(check_PROGRAMS) -EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebrazv.cfg zebraxpath.cfg - -t1_SOURCES = t1.c testlib.c testlib.h -t2_SOURCES = t2.c testlib.c testlib.h -t3_SOURCES = t3.c testlib.c testlib.h -t4_SOURCES = t4.c testlib.c testlib.h -t5_SOURCES = t5.c testlib.c testlib.h -t6_SOURCES = t6.c testlib.c testlib.h -t7_SOURCES = t7.c testlib.c testlib.h -t8_SOURCES = t8.c testlib.c testlib.h -t9_SOURCES = t9.c testlib.c testlib.h rankingrecords.h -t10_SOURCES = t10.c testlib.c testlib.h rankingrecords.h - -xpath1_SOURCES = xpath1.c testlib.c testlib.h -xpath2_SOURCES = xpath2.c testlib.c testlib.h -xpath3_SOURCES = xpath3.c testlib.c testlib.h -xpath4_SOURCES = xpath4.c testlib.c testlib.h -xpath5_SOURCES = xpath5.c testlib.c testlib.h +EXTRA_DIST=zebra.cfg zebra6.cfg zebra8.cfg zebrazv.cfg \ + zebraxpath.cfg safari.cfg + +noinst_LIBRARIES = libtestlib.a + +libtestlib_a_SOURCES = testlib.c testlib.h + +t1_SOURCES = t1.c +t2_SOURCES = t2.c +t3_SOURCES = t3.c +t4_SOURCES = t4.c +t5_SOURCES = t5.c +t6_SOURCES = t6.c +t7_SOURCES = t7.c +t8_SOURCES = t8.c +t9_SOURCES = t9.c rankingrecords.h +t10_SOURCES = t10.c rankingrecords.h + +xpath1_SOURCES = xpath1.c +xpath2_SOURCES = xpath2.c +xpath3_SOURCES = xpath3.c +xpath4_SOURCES = xpath4.c +xpath5_SOURCES = xpath5.c + +safari1_SOURCES = safari1.c testlib.c AM_CPPFLAGS = -I$(top_srcdir)/include $(YAZINC) @@ -43,5 +50,5 @@ zebralibs = \ ../../dfa/libidzebra-dfa.la \ ../../util/libidzebra-util.la -LDADD = $(zebralibs) $(YAZLALIB) +LDADD = libtestlib.a $(zebralibs) $(YAZLALIB) diff --git a/test/api/safari.cfg b/test/api/safari.cfg new file mode 100644 index 0000000..4e98532 --- /dev/null +++ b/test/api/safari.cfg @@ -0,0 +1,9 @@ +# $Id: safari.cfg,v 1.1 2004-11-29 21:55:28 adam Exp $ +profilepath: ${srcdir:-.}/../../tab + +attset: bib1.att + +recordType: safari + +isam: b + diff --git a/test/api/safari1.c b/test/api/safari1.c new file mode 100644 index 0000000..35c1051 --- /dev/null +++ b/test/api/safari1.c @@ -0,0 +1,62 @@ +/* $Id: safari1.c,v 1.1 2004-11-29 21:55:28 adam Exp $ + Copyright (C) 2004 + Index Data Aps + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Zebra; see the file LICENSE.zebra. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. +*/ + +/* safari1 - insert a few Safari records */ + +#include "testlib.h" + +const char *myrec[] = { + "1234\n" /* first record */ + "00024338 125060 1 any the\n" + "00024338 125060 2 any art\n" + "00024338 125060 3 any of\n", + + "5678\n" /* other record */ + "00024339 125060 1 any den\n" + "00024339 125060 2 any gamle\n" + "00024339 125060 3 any mand\n", + + "5678\n" /* same record identifier as before .. */ + "00024339 125060 1 any the\n" + "00024339 125060 2 any gamle\n" + "00024339 125060 3 any mand\n", + + 0}; + +int main(int argc, char **argv) +{ + zint ids[2]; + ZebraService zs = start_up("safari.cfg", argc, argv); + + ZebraHandle zh = zebra_open (zs); + + init_data(zh, myrec); + do_query(__LINE__, zh, "@attr 1=1016 the", 1); + do_query(__LINE__, zh, "@attr 1=1016 {the art}", 1); + do_query(__LINE__, zh, "@attr 1=1016 {den gamle}", 1); + do_query(__LINE__, zh, "@attr 1=1016 {the of}", 0); + + ids[0] = 24338; + meta_query(__LINE__, zh, "@attr 1=1016 the", 1, ids); + + return close_down(zh, zs, 0); +} diff --git a/test/api/testlib.c b/test/api/testlib.c index 5118284..b9c1b1e 100644 --- a/test/api/testlib.c +++ b/test/api/testlib.c @@ -1,4 +1,4 @@ -/* $Id: testlib.c,v 1.5 2004-11-19 10:27:15 heikki Exp $ +/* $Id: testlib.c,v 1.6 2004-11-29 21:55:28 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps @@ -102,15 +102,15 @@ int close_down(ZebraHandle zh, ZebraService zs, int retcode) /** inits the database and inserts test data */ -void init_data( ZebraHandle zh, const char **recs) +void init_data(ZebraHandle zh, const char **recs) { int i; char *addinfo; assert(zh); zebra_select_database(zh, "Default"); - yaz_log(log_level,"going to call init"); - i=zebra_init(zh); - yaz_log(log_level,"init returned %d",i); + yaz_log(log_level, "going to call init"); + i = zebra_init(zh); + yaz_log(log_level, "init returned %d",i); if (i) { printf("init failed with %d\n",i); @@ -129,8 +129,6 @@ void init_data( ZebraHandle zh, const char **recs) } - - int do_query(int lineno, ZebraHandle zh, char *query, int exphits) { ODR odr; @@ -151,7 +149,7 @@ int do_query(int lineno, ZebraHandle zh, char *query, int exphits) printf("Error: Parse failed \n%s\n",query); exit(1); } - rc=zebra_search_RPN (zh, odr, rpn, setname, &hits); + rc = zebra_search_RPN (zh, odr, rpn, setname, &hits); if (rc) { printf("Error: search returned %d \n%s\n",rc,query); exit (1); @@ -182,15 +180,16 @@ void ranking_query(int lineno, ZebraHandle zh, char *query, int rc; int i; - hits=do_query(lineno, zh, query, exphits); + hits = do_query(lineno, zh, query, exphits); for (i = 0; i<10; i++) retrievalRecord[i].position = i+1; - rc=zebra_records_retrieve (zh, odr_output, setname, 0, - VAL_TEXT_XML, hits, retrievalRecord); - - if (rc) { + rc = zebra_records_retrieve (zh, odr_output, setname, 0, + VAL_TEXT_XML, hits, retrievalRecord); + + if (rc) + { printf("Error: retrieve returned %d \n%s\n",rc,query); exit (1); } @@ -206,9 +205,47 @@ void ranking_query(int lineno, ZebraHandle zh, char *query, if (retrievalRecord[0].score != firstscore) { printf("Error: first rec got score %d instead of %d\n", - retrievalRecord[0].score, firstscore); + retrievalRecord[0].score, firstscore); exit(1); } odr_destroy (odr_output); } +void meta_query(int lineno, ZebraHandle zh, char *query, int exphits, + zint *ids) +{ + ZebraMetaRecord *meta; + ODR odr_output = odr_createmem (ODR_ENCODE); + const char *setname="rsetname"; + zint *positions = (zint *) malloc(1 + (exphits * sizeof(zint))); + int hits; + int rc; + int i; + + hits = do_query(lineno, zh, query, exphits); + + for (i = 0; i>8)) + if (key.mem[0] != (i<<8)) { printf ("%s: i=%d mem[0]=" ZINT_FORMAT " expected " "%d\n", prog, i, key.mem[0], i>>8); @@ -108,6 +108,43 @@ int tst_encode(int num) return 0; } +void tstcodec1() +{ + char buf[100]; + char *dst = buf; + const char *src; + struct it_key key1, key2; + void *codec_handle =iscz1_start(); + + key1.len = 4; + key1.mem[0] = 4*65536+1016; + key1.mem[1] = 24339; + key1.mem[2] = 125060; + key1.mem[3] = 1; + + src = (char*) &key1; + dst = buf; + iscz1_encode(codec_handle, &dst, &src); + + iscz1_stop(codec_handle); + + codec_handle =iscz1_start(); + + dst = (char*) &key2; + src = buf; + + iscz1_decode(codec_handle, &dst, &src); + + iscz1_stop(codec_handle); + + if (memcmp(&key1, &key2, sizeof(key1))) + { + printf ("keys differ in tstcodec1\n"); + exit(1); + } +} + + int main(int argc, char **argv) { int num = 0; @@ -116,6 +153,7 @@ int main(int argc, char **argv) num = atoi(argv[1]); if (num < 1 || num > 100000000) num = 10000; + tstcodec1(); exit(tst_encode(num)); } -- 1.7.10.4