X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frecgrs.c;h=500a0b141e1b8272c271848ee90b4611e943c3eb;hp=cb99ff3ad38c7304e1c5656fb8f970482d6294ed;hb=4815bf6068b5a2623c50475e0e5ec598709934e0;hpb=1b5ef8265837240930862dddd2d7ef963a0cc211 diff --git a/index/recgrs.c b/index/recgrs.c index cb99ff3..500a0b1 100644 --- a/index/recgrs.c +++ b/index/recgrs.c @@ -1,8 +1,5 @@ -/* $Id: recgrs.c,v 1.18 2007-04-16 21:54:37 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2010 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -107,12 +104,18 @@ static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd) return 0; sp_lex(sp); - if (wrd->term_buf && wrd->term_len) + if (wrd->term_buf) { - wrd->term_buf += start; - wrd->term_len -= start; - if (wrd->term_len > len) - wrd->term_len = len; + if (start >= wrd->term_len) + wrd->term_len = 0; + else + { + wrd->term_len -= start; + wrd->term_buf += start; + + if (wrd->term_len > len) + wrd->term_len = len; + } } return 1; } @@ -396,7 +399,7 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) #endif char *pexpr = xmalloc(strlen(tagpath)+5); - sprintf (pexpr, "/%s\n", tagpath); + sprintf(pexpr, "/%s\n", tagpath); for (xpe = abs->xp_elements; xpe; xpe = xpe->next) xpe->match_state = -1; /* don't know if it matches yet */ @@ -418,7 +421,7 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) xpe1->match_state = ok; #endif } - assert (ok == 0 || ok == 1); + assert(ok == 0 || ok == 1); if (ok) { #ifdef ENHANCED_XELM /* we have to check the perdicates up to the root node */ @@ -450,7 +453,6 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) xfree(pexpr); if (xpe) { - yaz_log(YLOG_DEBUG, "Got it"); return xpe->termlists; } else { return NULL; @@ -476,25 +478,25 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) */ /* add xpath index for an attribute */ -static void index_xpath_attr (char *tag_path, char *name, char *value, +static void index_xpath_attr(char *tag_path, char *name, char *value, char *structure, struct recExtractCtrl *p, RecWord *wrd) { wrd->index_name = ZEBRA_XPATH_ELM_BEGIN; - wrd->index_type = '0'; + wrd->index_type = "0"; wrd->term_buf = tag_path; wrd->term_len = strlen(tag_path); (*p->tokenAdd)(wrd); if (value) { wrd->index_name = ZEBRA_XPATH_ATTR_CDATA; - wrd->index_type = 'w'; + wrd->index_type = "w"; wrd->term_buf = value; wrd->term_len = strlen(value); (*p->tokenAdd)(wrd); } wrd->index_name = ZEBRA_XPATH_ELM_END; - wrd->index_type = '0'; + wrd->index_type = "0"; wrd->term_buf = tag_path; wrd->term_len = strlen(tag_path); (*p->tokenAdd)(wrd); @@ -514,7 +516,7 @@ static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n) size_t tlen = strlen(nn->u.tag.tag); if (tlen + flen > (max - 2)) break; - memcpy (tag_path_full + flen, nn->u.tag.tag, tlen); + memcpy(tag_path_full + flen, nn->u.tag.tag, tlen); flen += tlen; tag_path_full[flen++] = '/'; } @@ -563,11 +565,11 @@ static void index_xpath(struct source_parser *sp, data1_node *n, { /* need to copy recword because it may be changed */ RecWord wrd_tl; - wrd->index_type = *tl->structure; - memcpy (&wrd_tl, wrd, sizeof(*wrd)); + wrd->index_type = tl->structure; + memcpy(&wrd_tl, wrd, sizeof(*wrd)); if (tl->source) sp_parse(sp, n, &wrd_tl, tl->source); - + /* this is just the old fashioned attribute based index */ wrd_tl.index_name = tl->index_name; if (p->flagShowRecords) @@ -576,13 +578,13 @@ static void index_xpath(struct source_parser *sp, data1_node *n, printf("%*sIdx: [%s]", (level + 1) * 4, "", tl->structure); printf("%s %s", tl->index_name, tl->source); - printf (" XData:\""); + printf(" XData:\""); for (i = 0; i 40) - printf (" ..."); - fputc ('\n', stdout); + printf(" ..."); + fputc('\n', stdout); } else { @@ -600,14 +602,14 @@ static void index_xpath(struct source_parser *sp, data1_node *n, if (!p->flagShowRecords && !termlist_only) { wrd->index_name = xpath_index; - wrd->index_type = 'w'; + wrd->index_type = "w"; (*p->tokenAdd)(wrd); } break; case DATA1N_tag: mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n); - wrd->index_type = '0'; + wrd->index_type = "0"; wrd->term_buf = tag_path_full; wrd->term_len = strlen(tag_path_full); wrd->index_name = xpath_index; @@ -615,9 +617,9 @@ static void index_xpath(struct source_parser *sp, data1_node *n, { printf("%*s tag=", (level + 1) * 4, ""); for (i = 0; iterm_len && i < 40; i++) - fputc (wrd->term_buf[i], stdout); + fputc(wrd->term_buf[i], stdout); if (i == 40) - printf (" .."); + printf(" .."); printf("\n"); } else @@ -638,7 +640,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, char attr_tag_path_full[1024]; /* this could be cached as well */ - sprintf (attr_tag_path_full, "@%s/%s", + sprintf(attr_tag_path_full, "@%s/%s", xp->name, tag_path_full); tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n); @@ -646,7 +648,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, if (!termlist_only) { /* attribute (no value) */ - wrd->index_type = '0'; + wrd->index_type = "0"; wrd->index_name = ZEBRA_XPATH_ATTR_NAME; wrd->term_buf = xp->name; wrd->term_len = strlen(xp->name); @@ -659,12 +661,12 @@ static void index_xpath(struct source_parser *sp, data1_node *n, strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) { /* attribute value exact */ - strcpy (comb, xp->name); - strcat (comb, "="); - strcat (comb, xp->value); + strcpy(comb, xp->name); + strcat(comb, "="); + strcat(comb, xp->value); wrd->index_name = ZEBRA_XPATH_ATTR_NAME; - wrd->index_type = '0'; + wrd->index_type = "0"; wrd->term_buf = comb; wrd->term_len = strlen(comb); wrd->seqno--; @@ -681,7 +683,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, char attr_tag_path_full[1024]; int xpdone = 0; - sprintf (attr_tag_path_full, "@%s/%s", + sprintf(attr_tag_path_full, "@%s/%s", xp->name, tag_path_full); if ((tl = tll[i])) { @@ -691,7 +693,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, if (!tl->index_name) { /* add xpath index for the attribute */ - index_xpath_attr (attr_tag_path_full, xp->name, + index_xpath_attr(attr_tag_path_full, xp->name, xp->value, tl->structure, p, wrd); xpdone = 1; @@ -700,7 +702,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, if (xp->value) { wrd->index_name = tl->index_name; - wrd->index_type = *tl->structure; + wrd->index_type = tl->structure; wrd->term_buf = xp->value; wrd->term_len = strlen(xp->value); (*p->tokenAdd)(wrd); @@ -713,7 +715,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, the attribute as "w" */ if (!xpdone && !termlist_only) { - index_xpath_attr (attr_tag_path_full, xp->name, + index_xpath_attr(attr_tag_path_full, xp->name, xp->value, "w", p, wrd); } i++; @@ -723,7 +725,7 @@ static void index_xpath(struct source_parser *sp, data1_node *n, } } -static void index_termlist (struct source_parser *sp, data1_node *par, +static void index_termlist(struct source_parser *sp, data1_node *par, data1_node *n, struct recExtractCtrl *p, int level, RecWord *wrd) { @@ -759,17 +761,17 @@ static void index_termlist (struct source_parser *sp, data1_node *par, printf("%*sIdx: [%s]", (level + 1) * 4, "", tlist->structure); printf("%s %s", tlist->index_name, tlist->source); - printf (" XData:\""); + printf(" XData:\""); for (i = 0; iterm_len && i < 40; i++) - fputc (wrd->term_buf[i], stdout); - fputc ('"', stdout); + fputc(wrd->term_buf[i], stdout); + fputc('"', stdout); if (wrd->term_len > 40) - printf (" ..."); - fputc ('\n', stdout); + printf(" ..."); + fputc('\n', stdout); } else { - wrd->index_type = *tlist->structure; + wrd->index_type = tlist->structure; wrd->index_name = tlist->index_name; (*p->tokenAdd)(wrd); } @@ -915,13 +917,13 @@ static int grs_extract_sub(void *clientData, struct recExtractCtrl *p, data1_concat_text(p->dh, mem, n); /* ensure our data1 tree is UTF-8 */ - data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n)); + data1_iconv(p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n)); - data1_remove_idzebra_subtree (p->dh, n); + data1_remove_idzebra_subtree(p->dh, n); #if 0 - data1_pr_tree (p->dh, n, stdout); + data1_pr_tree(p->dh, n, stdout); #endif (*p->init)(p, &wrd); @@ -936,7 +938,7 @@ int zebra_grs_extract(void *clientData, struct recExtractCtrl *p, data1_node *(*grs_read)(struct grs_read_info *)) { int ret; - NMEM mem = nmem_create (); + NMEM mem = nmem_create(); ret = grs_extract_sub(clientData, p, mem, grs_read); nmem_destroy(mem); return ret; @@ -1031,7 +1033,7 @@ static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c, */ -static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top, +static void zebra_xml_metadata(struct recRetrieveCtrl *p, data1_node *top, NMEM mem) { const char *idzebra_ns[3]; @@ -1043,29 +1045,29 @@ static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top, idzebra_ns[1] = "http://www.indexdata.dk/zebra/"; idzebra_ns[2] = 0; - data1_mk_text (p->dh, mem, i2, top); + data1_mk_text(p->dh, mem, i2, top); - n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top); + n = data1_mk_tag(p->dh, mem, "idzebra", idzebra_ns, top); - data1_mk_text (p->dh, mem, "\n", top); + data1_mk_text(p->dh, mem, "\n", top); - data1_mk_text (p->dh, mem, i4, n); + data1_mk_text(p->dh, mem, i4, n); - data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem); + data1_mk_tag_data_int(p->dh, n, "size", p->recordSize, mem); if (p->score != -1) { - data1_mk_text (p->dh, mem, i4, n); - data1_mk_tag_data_int (p->dh, n, "score", p->score, mem); + data1_mk_text(p->dh, mem, i4, n); + data1_mk_tag_data_int(p->dh, n, "score", p->score, mem); } - data1_mk_text (p->dh, mem, i4, n); - data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem); + data1_mk_text(p->dh, mem, i4, n); + data1_mk_tag_data_zint(p->dh, n, "localnumber", p->localno, mem); if (p->fname) { - data1_mk_text (p->dh, mem, i4, n); + data1_mk_text(p->dh, mem, i4, n); data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem); } - data1_mk_text (p->dh, mem, i2, n); + data1_mk_text(p->dh, mem, i2, n); } int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, @@ -1079,7 +1081,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, struct grs_read_info gri; const char *tagname; - const int *requested_schema = 0; + const Odr_oid *requested_schema = 0; data1_marctab *marctab; int dummy; @@ -1094,17 +1096,17 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, if (!node) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; - nmem_destroy (mem); + nmem_destroy(mem); return 0; } data1_concat_text(p->dh, mem, node); - data1_remove_idzebra_subtree (p->dh, node); + data1_remove_idzebra_subtree(p->dh, node); #if 0 - data1_pr_tree (p->dh, node, stdout); + data1_pr_tree(p->dh, node, stdout); #endif - top = data1_get_root_tag (p->dh, node); + top = data1_get_root_tag(p->dh, node); yaz_log(YLOG_DEBUG, "grs_retrieve: size"); tagname = data1_systag_lookup(node->u.root.absyn, "size", "size"); @@ -1148,10 +1150,10 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, assert(p->input_format); if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml)) - zebra_xml_metadata (p, top, mem); + zebra_xml_metadata(p, top, mem); #if 0 - data1_pr_tree (p->dh, node, stdout); + data1_pr_tree(p->dh, node, stdout); #endif if (p->comp && p->comp->which == Z_RecordComp_complex && p->comp->u.complex->generic && @@ -1168,14 +1170,13 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping"); for (map = node->u.root.absyn->maptabs; map; map = map->next) { - // if (map->target_absyn_ref == requested_schema) if (!oid_oidcmp(map->oid, requested_schema)) { onode = node; if (!(node = data1_map_record(p->dh, onode, map, mem))) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; - nmem_destroy (mem); + nmem_destroy(mem); return 0; } break; @@ -1185,7 +1186,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, && oid_oidcmp(requested_schema, node->u.root.absyn->oid)) { p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; - nmem_destroy (mem); + nmem_destroy(mem); return 0; } } @@ -1204,7 +1205,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, if (!(node = data1_map_record(p->dh, onode, map, mem))) { p->diagnostic = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; - nmem_destroy (mem); + nmem_destroy(mem); return 0; } break; @@ -1238,7 +1239,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, selected = 1; #if 0 - data1_pr_tree (p->dh, node, stdout); + data1_pr_tree(p->dh, node, stdout); #endif yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping"); @@ -1248,10 +1249,10 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_xml)) { #if 0 - data1_pr_tree (p->dh, node, stdout); + data1_pr_tree(p->dh, node, stdout); #endif /* default output encoding for XML is UTF-8 */ - data1_iconv (p->dh, mem, node, + data1_iconv(p->dh, mem, node, p->encoding ? p->encoding : "UTF-8", data1_get_encoding(p->dh, node)); @@ -1260,14 +1261,14 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; else { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); + char *new_buf = (char*) odr_malloc(p->odr, p->rec_len); + memcpy(new_buf, p->rec_buf, p->rec_len); p->rec_buf = new_buf; } } else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_grs_1)) { - data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); dummy = 0; if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected, p->odr, &dummy))) @@ -1278,7 +1279,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_explain)) { /* ensure our data1 tree is UTF-8 */ - data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected, p->odr))) @@ -1289,7 +1290,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_summary)) { /* ensure our data1 tree is UTF-8 */ - data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); + data1_iconv(p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node)); if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected, p->odr))) p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; @@ -1299,30 +1300,30 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_sutrs)) { if (p->encoding) - data1_iconv (p->dh, mem, node, p->encoding, + data1_iconv(p->dh, mem, node, p->encoding, data1_get_encoding(p->dh, node)); if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected, &p->rec_len))) p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; else { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); + char *new_buf = (char*) odr_malloc(p->odr, p->rec_len); + memcpy(new_buf, p->rec_buf, p->rec_len); p->rec_buf = new_buf; } } else if (!oid_oidcmp(p->input_format, yaz_oid_recsyn_soif)) { if (p->encoding) - data1_iconv (p->dh, mem, node, p->encoding, + data1_iconv(p->dh, mem, node, p->encoding, data1_get_encoding(p->dh, node)); if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected, &p->rec_len))) p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; else { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); + char *new_buf = (char*) odr_malloc(p->odr, p->rec_len); + memcpy(new_buf, p->rec_buf, p->rec_len); p->rec_buf = new_buf; } } @@ -1341,15 +1342,15 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, else { if (p->encoding) - data1_iconv (p->dh, mem, node, p->encoding, + data1_iconv(p->dh, mem, node, p->encoding, data1_get_encoding(p->dh, node)); if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node, selected, &p->rec_len))) p->diagnostic = YAZ_BIB1_RECORD_NOT_AVAILABLE_IN_REQUESTED_SYNTAX; else { - char *new_buf = (char*) odr_malloc (p->odr, p->rec_len); - memcpy (new_buf, p->rec_buf, p->rec_len); + char *new_buf = (char*) odr_malloc(p->odr, p->rec_len); + memcpy(new_buf, p->rec_buf, p->rec_len); p->rec_buf = new_buf; } } @@ -1362,6 +1363,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab