From: Adam Dickmeiss Date: Fri, 3 May 2002 13:50:24 +0000 (+0000) Subject: data1 cleanup X-Git-Tag: ZEBRA.1.3.0~66 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=3ecabdd7340cf895d4d446a217b8b69c17a2d781;ds=sidebyside data1 cleanup --- diff --git a/TODO b/TODO index 55848cd..f85e221 100644 --- a/TODO +++ b/TODO @@ -1,39 +1,37 @@ Zebra TODO -$Id: TODO,v 1.10 2000-12-05 10:01:44 adam Exp $ +$Id: TODO,v 1.11 2002-05-03 13:50:24 adam Exp $ -Explain category Attributedetails should include name/description -of search- attrribute. +Make test scripts. -String indexing/search (possibly including XPATH-like queres) +Add XML parser (EXPAT). -Null-value search. +Ensure that UTF-8 indexing works and that EXPAT using +other character set is converted to it. -Handle XML empty node constructs +Handle free lists for isamb (possibly others). -Browse set. +Check that recordId: actually works. -Multiple register areas. +Finish X-path-like search including attributes. -Hit Vector for each term in query in search-response PDU. +Make "fake" absyn even though no-one is found and warn +first time. Make directive to enable/disable xpath-indexing. -Prefix configuration for externally stored records. +For traditional searches, allow string attributes that matches +name in *.att-file. -Size of sort entries should be configurable. +Explain category Attributedetails should include name/description +of search- attrribute. -Use first field in sorting, i.e. author. +Prefix configuration for externally stored records. System number sorting. Configurable default sorting criteria - used when query is not ranked. -Configurable default search attributes. - -Better ranking in searches. Admin should be able specify initial - weight to certain fields. +Use first field in sorting, i.e. author. -ISAMC optimization: indirect block with pointers to all blocks - in chain. The initial block should include the count as well. +Size of sort entries should be configurable. -Create surrogate diagnostic when ONE record is missing insead of a -non-surrogate diagnostic. +Configurable default search attributes. diff --git a/recctrl/marcread.c b/recctrl/marcread.c index 7e072aa..4afdd7a 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,47 +1,9 @@ /* - * Copyright (C) 1997-1999, Index Data + * Copyright (C) 1997-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: marcread.c,v $ - * Revision 1.12 2002-04-09 14:36:53 adam - * Fix XML attributes for MARC reader - * - * Revision 1.11 2000/05/15 15:32:51 adam - * Added support for 64 bit input file support. - * - * Revision 1.10 1999/11/30 13:48:04 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.9 1999/06/25 13:47:25 adam - * Minor change that prevents MSVC warning. - * - * Revision 1.8 1999/05/26 07:49:14 adam - * C++ compilation. - * - * Revision 1.7 1999/05/20 12:57:18 adam - * Implemented TCL filter. Updated recctrl system. - * - * Revision 1.6 1999/02/02 14:51:27 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.5 1997/11/18 10:03:24 adam - * Member num_children removed from data1_node. - * - * Revision 1.4 1997/10/27 14:34:26 adam - * Fixed bug - data1 root node wasn't tagged at all! - * - * Revision 1.3 1997/09/24 13:36:51 adam - * *** empty log message *** - * - * Revision 1.2 1997/09/17 12:19:21 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.1 1997/09/04 13:54:40 adam - * Added MARC filter - type grs.marc. where syntax refers - * to abstract syntax. New method tellf in retrieve/extract method. - * + * $Id: marcread.c,v 1.13 2002-05-03 13:50:25 adam Exp $ */ #include #include @@ -52,88 +14,6 @@ #include #include "grsread.h" -data1_node *data1_mk_node_wp (data1_handle dh, NMEM mem, data1_node *parent) -{ - data1_node *res = data1_mk_node (dh, mem); - - if (!parent) - res->root = res; - else - { - res->root = parent->root; - res->parent = parent; - if (!parent->child) - parent->child = parent->last_child = res; - else - parent->last_child->next = res; - parent->last_child = res; - } - return res; -} - -static void destroy_data (struct data1_node *n) -{ - assert (n->which == DATA1N_data); - xfree (n->u.data.data); -} - -data1_node *data1_mk_node_text (data1_handle dh, NMEM mem, data1_node *parent, - const char *buf, size_t len) -{ - data1_node *res = data1_mk_node_wp (dh, mem, parent); - res->which = DATA1N_data; - res->u.data.formatted_text = 0; - res->u.data.what = DATA1I_text; - res->u.data.len = len; - if (res->u.data.len > DATA1_LOCALDATA) { - res->u.data.data = (char *) xmalloc (res->u.data.len); - res->destroy = destroy_data; - } - else - res->u.data.data = res->lbuf; - memcpy (res->u.data.data, buf, res->u.data.len); - return res; -} - -data1_node *data1_mk_node_tag (data1_handle dh, NMEM mem, data1_node *parent, - const char *tag, size_t len) -{ - data1_element *elem = NULL; - data1_node *partag = get_parent_tag(dh, parent); - data1_node *res; - data1_element *e = NULL; - int localtag = 0; - - res = data1_mk_node_wp (dh, mem, parent); - - res->which = DATA1N_tag; - res->u.tag.tag = res->lbuf; - res->u.tag.get_bytes = -1; -#if DATA1_USING_XATTR - res->u.tag.attributes = 0; -#endif - - if (len >= DATA1_LOCALDATA) - len = DATA1_LOCALDATA-1; - - memcpy (res->u.tag.tag, tag, len); - res->u.tag.tag[len] = '\0'; - - if (parent->which == DATA1N_variant) - return res; - if (partag) - if (!(e = partag->u.tag.element)) - localtag = 1; - - elem = data1_getelementbytagname (dh, res->root->u.root.absyn, e, - res->u.tag.tag); - res->u.tag.element = elem; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; - return res; -} - #define MARC_DEBUG 0 data1_node *grs_read_marc (struct grs_read_info *p) @@ -153,7 +33,6 @@ data1_node *grs_read_marc (struct grs_read_info *p) #endif data1_node *res_root; - data1_absyn *absyn; char *absynName; data1_marctab *marctab; @@ -181,19 +60,14 @@ data1_node *grs_read_marc (struct grs_read_info *p) (*p->endf)(p->fh, cur_offset - 1); } absynName = p->type; - logf (LOG_DEBUG, "absynName = %s", absynName); - if (!(absyn = data1_get_absyn (p->dh, absynName))) + res_root = data1_mk_root (p->dh, p->mem, absynName); + if (!res_root) { - logf (LOG_WARN, "Unknown abstract syntax: %s", absynName); - return NULL; + yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax"); + return 0; } - res_root = data1_mk_node_wp (p->dh, p->mem, NULL); - res_root->which = DATA1N_root; - res_root->u.root.type = (char *) nmem_malloc (p->mem, strlen(absynName)+1); - strcpy (res_root->u.root.type, absynName); - res_root->u.root.absyn = absyn; - marctab = absyn->marc; + marctab = res_root->u.root.absyn->marc; if (marctab && marctab->force_indicator_length >= 0) indicator_length = marctab->force_indicator_length; @@ -205,7 +79,6 @@ data1_node *grs_read_marc (struct grs_read_info *p) identifier_length = atoi_n (buf+11, 1); base_address = atoi_n (buf+12, 4); - length_data_entry = atoi_n (buf+20, 1); length_starting = atoi_n (buf+21, 1); length_implementation = atoi_n (buf+22, 1); @@ -227,8 +100,9 @@ data1_node *grs_read_marc (struct grs_read_info *p) entry_p += 3; tag[3] = '\0'; + /* generate field node */ - res = data1_mk_node_tag (p->dh, p->mem, res_root, tag, 3); + res = data1_mk_tag_n (p->dh, p->mem, tag, 3, res_root); #if MARC_DEBUG fprintf (outf, "%s ", tag); @@ -246,8 +120,8 @@ data1_node *grs_read_marc (struct grs_read_info *p) #if MARC_DEBUG int j; #endif - res = data1_mk_node_tag (p->dh, p->mem, res, buf+i, - indicator_length); + res = data1_mk_tag_n (p->dh, p->mem, + buf+i, indicator_length, res); #if MARC_DEBUG for (j = 0; jdh, p->mem, parent, - buf+i+1, identifier_length-1); + data1_mk_tag_n (p->dh, p->mem, + buf+i+1, identifier_length-1, parent); #if MARC_DEBUG fprintf (outf, " $"); for (j = 1; jdh, p->mem, res, buf + i0, i - i0); + data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res); i0 = i; } else @@ -293,9 +167,9 @@ data1_node *grs_read_marc (struct grs_read_info *p) } if (i > i0) { - data1_node *res = data1_mk_node_tag (p->dh, p->mem, - parent, "@", 1); - data1_mk_node_text (p->dh, p->mem, res, buf + i0, i - i0); + data1_node *res = data1_mk_tag_n (p->dh, p->mem, + "@", 1, parent); + data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res); } #if MARC_DEBUG fprintf (outf, "\n"); diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 5974888..d7af403 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -3,7 +3,10 @@ * All rights reserved. * * $Log: recgrs.c,v $ - * Revision 1.46 2002-04-13 18:16:43 adam + * Revision 1.47 2002-05-03 13:50:25 adam + * data1 cleanup + * + * Revision 1.46 2002/04/13 18:16:43 adam * More XPATH work; common sequence numbers for extract keys * * Revision 1.45 2002/04/12 14:40:42 adam @@ -766,8 +769,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) data1_pr_tree (p->dh, node, stdout); #endif logf (LOG_DEBUG, "grs_retrieve: size"); - if ((dnew = data1_insert_taggeddata(p->dh, node, node, - "size", mem))) + if ((dnew = data1_mk_tag_data_wd(p->dh, node, node,"size", mem))) { dnew->u.data.what = DATA1I_text; dnew->u.data.data = dnew->lbuf; @@ -777,7 +779,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) tagname = res_get_def(p->res, "tagrank", "rank"); if (strcmp(tagname, "0") && p->score >= 0 && - (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem))) + (dnew = data1_mk_tag_data_wd(p->dh, node, node, tagname, mem))) { logf (LOG_DEBUG, "grs_retrieve: %s", tagname); dnew->u.data.what = DATA1I_num; @@ -788,7 +790,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) tagname = res_get_def(p->res, "tagsysno", "localControlNumber"); if (strcmp(tagname, "0") && p->localno > 0 && - (dnew = data1_insert_taggeddata(p->dh, node, node, tagname, mem))) + (dnew = data1_mk_tag_data_wd(p->dh, node, node, tagname, mem))) { logf (LOG_DEBUG, "grs_retrieve: %s", tagname); dnew->u.data.what = DATA1I_text; @@ -797,6 +799,8 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) dnew->u.data.len = strlen(dnew->u.data.data); } + data1_pr_tree (p->dh, node, stdout); + if (p->comp && p->comp->which == Z_RecordComp_complex && p->comp->u.complex->generic && p->comp->u.complex->generic->schema) @@ -884,8 +888,8 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p) } *(p++) = '\0'; - if ((dnew = data1_insert_taggeddata(dh, node, node, - "schemaIdentifier", mem))) + if ((dnew = data1_mk_tag_data_wd(dh, node, node, + "schemaIdentifier", mem))) { dnew->u.data.what = DATA1I_oid; dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp); diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 945c705..9f612e5 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -2,7 +2,7 @@ * Copyright (C) 1994-2002, Index Data * All rights reserved. * - * $Id: regxread.c,v 1.39 2002-04-15 09:07:10 adam Exp $ + * $Id: regxread.c,v 1.40 2002-05-03 13:50:25 adam Exp $ */ #include #include @@ -672,28 +672,14 @@ static void execData (struct lexSpec *spec, { org_len = 0; - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_data; + res = data1_mk_node (spec->dh, spec->m, DATA1N_data, parent); res->u.data.what = DATA1I_text; res->u.data.len = 0; res->u.data.formatted_text = formatted_text; -#if 0 - if (elen > DATA1_LOCALDATA) - res->u.data.data = nmem_malloc (spec->m, elen); - else - res->u.data.data = res->lbuf; - memcpy (res->u.data.data, ebuf, elen); -#else res->u.data.data = 0; -#endif - res->root = parent->root; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) spec->d1_stack[spec->d1_level]->next = res; - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; } if (org_len + elen >= spec->concatBuf[spec->d1_level].max) @@ -776,21 +762,9 @@ static void variantBegin (struct lexSpec *spec, if (parent->which != DATA1N_variant) { - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_variant; - res->u.variant.type = 0; - res->u.variant.value = 0; - res->root = parent->root; - - parent->last_child = res; + res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent); if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -805,10 +779,7 @@ static void variantBegin (struct lexSpec *spec, logf (LOG_LOG, "variant node (%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_variant; - res->root = parent->root; + res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent); res->u.variant.type = tp; if (value_len >= DATA1_LOCALDATA) @@ -818,14 +789,8 @@ static void variantBegin (struct lexSpec *spec, res->u.variant.value = res->lbuf; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -863,8 +828,7 @@ static void tagBegin (struct lexSpec *spec, parent = spec->d1_stack[spec->d1_level -1]; partag = get_parent_tag(spec->dh, parent); - res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag); - res->parent = parent; + res = data1_mk_node (spec->dh, spec->m, DATA1N_tag, parent); if (len >= DATA1_LOCALDATA) res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); @@ -887,16 +851,9 @@ static void tagBegin (struct lexSpec *spec, spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); res->u.tag.element = elem; - res->root = parent->root; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -1335,11 +1292,9 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) #endif absyn = data1_get_absyn (spec->dh, absynName); - res = data1_mk_node (spec->dh, spec->m); - res->which = DATA1N_root; + res = data1_mk_node (spec->dh, spec->m, DATA1N_root, 0); res->u.root.type = absynName; res->u.root.absyn = absyn; - res->root = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL;