From 24a717fb09063be70ff56dc82e9f03e7673254e0 Mon Sep 17 00:00:00 2001 From: Dennis Schafroth Date: Wed, 11 Aug 2010 16:33:30 +0200 Subject: [PATCH] Add SOLR (rpn2solr) support Not 100% working, only basic stuff. TODO: also clean out stuff not needed (based on the CQF files) --- src/rpn2solr.c | 336 +++++++++++++++++ src/solrtransform.c | 995 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1331 insertions(+) create mode 100644 src/rpn2solr.c create mode 100644 src/solrtransform.c diff --git a/src/rpn2solr.c b/src/rpn2solr.c new file mode 100644 index 0000000..de485ab --- /dev/null +++ b/src/rpn2solr.c @@ -0,0 +1,336 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data + * See the file LICENSE for details. + */ + +/** + * \file + * \brief Implements RPN to SOLR conversion + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static void wrbuf_vputs(const char *buf, void *client_data) +{ + wrbuf_write((WRBUF) client_data, buf, strlen(buf)); +} + +static const char *lookup_index_from_string_attr(Z_AttributeList *attributes) +{ + int j; + int server_choice = 1; + for (j = 0; j < attributes->num_attributes; j++) + { + Z_AttributeElement *ae = attributes->attributes[j]; + if (*ae->attributeType == 1) /* use attribute */ + { + if (ae->which == Z_AttributeValue_complex) + { + Z_ComplexAttribute *ca = ae->value.complex; + int i; + for (i = 0; i < ca->num_list; i++) + { + Z_StringOrNumeric *son = ca->list[i]; + if (son->which == Z_StringOrNumeric_string) + return son->u.string; + } + } + server_choice = 0; /* not serverChoice because we have use attr */ + } + } + if (server_choice) + return "solr.serverChoice"; + return 0; +} + +static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes) +{ + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + Z_AttributeElement *ae = attributes->attributes[j]; + if (*ae->attributeType == 2) /* relation attribute */ + { + if (ae->which == Z_AttributeValue_numeric) + { + /* Only support for numeric relation */ + Odr_int *relation = ae->value.numeric; + /* map this numeric to representation in SOLR */ + switch (*relation) + { + /* Unsure on whether this is the relation attribute constants? */ + case Z_ProximityOperator_Prox_lessThan: + return 0; + case Z_ProximityOperator_Prox_lessThanOrEqual: + return 0; + case Z_ProximityOperator_Prox_equal: + return ":"; + case Z_ProximityOperator_Prox_greaterThanOrEqual: + return 0; + case Z_ProximityOperator_Prox_greaterThan: + return 0; + case Z_ProximityOperator_Prox_notEqual: + return 0; + case 100: + /* phonetic is not implemented*/ + return 0; + case 101: + /* stem is not not implemented */ + return 0; + case 102: + /* relevance is supported in SOLR, but not implemented yet */ + return 0; + default: + /* Invalid relation */ + return 0; + } + } + else { + /* Can we have a complex relation value? + Should we implement something? + */ + } + } + } + return ":"; +} + +static int rpn2solr_attr(solr_transform_t ct, + Z_AttributeList *attributes, WRBUF w) +{ + const char *relation = solr_lookup_reverse(ct, "relation.", attributes); + const char *index = solr_lookup_reverse(ct, "index.", attributes); + const char *structure = solr_lookup_reverse(ct, "structure.", attributes); + + /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */ + if (!index) + index = lookup_index_from_string_attr(attributes); + + /* Attempt to fix bug #2978: Look for a relation attribute */ + if (!relation) + relation = lookup_relation_index_from_attr(attributes); + + if (!index) + { + solr_transform_set_error(ct, + YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0); + return -1; + } + /* for serverChoice we omit index+relation+structure */ + if (strcmp(index, "solr.serverChoice")) + { + wrbuf_puts(w, index); + if (relation) + { + if (!strcmp(relation, "exact")) + /* TODO Verify if a exact SOLR exists */ + relation = ":"; + else if (!strcmp(relation, "eq")) + relation = ":"; + else if (!strcmp(relation, "le")) { + /* TODO Not support as such, but could perhaps be transformed into a range + relation = ":[ * to "; + close_range = "]" + */ + } + else if (!strcmp(relation, "ge")) { + /* TODO Not support as such, but could perhaps be transformed into a range + relation = "["; + relation = ":[ * to "; + close_range = "]" + */ + } + /* Missing mapping of not equal, phonetic, stem and relevance */ + wrbuf_puts(w, relation); + } + else + wrbuf_puts(w, ":"); + + if (structure) + { + if (strcmp(structure, "*")) + { + wrbuf_puts(w, "/"); + wrbuf_puts(w, structure); + wrbuf_puts(w, " "); + } + } + } + return 0; +} + +/* Bug 2878: Currently only support left and right truncation. Specific check for this */ +static int checkForTruncation(int flag, Z_AttributeList *attributes) +{ + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + Z_AttributeElement *ae = attributes->attributes[j]; + if (*ae->attributeType == 5) /* truncation attribute */ + { + if (ae->which == Z_AttributeValue_numeric) + { + Odr_int truncation = *(ae->value.numeric); + /* This logic only works for Left, right and both. eg. 1,2,3 */ + if (truncation <= 3) + return ((int) truncation & flag); + } + /* Complex: Shouldn't happen */ + } + } + /* No truncation or unsupported */ + return 0; +}; + +static int checkForLeftTruncation(Z_AttributeList *attributes) { + return checkForTruncation(1, attributes); +} + +static int checkForRightTruncation(Z_AttributeList *attributes) { + return checkForTruncation(2, attributes); +}; + +static int rpn2solr_simple(solr_transform_t ct, + void (*pr)(const char *buf, void *client_data), + void *client_data, + Z_Operand *q, WRBUF w) +{ + int ret = 0; + if (q->which != Z_Operand_APT) + { + ret = -1; + solr_transform_set_error(ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0); + } + else + { + Z_AttributesPlusTerm *apt = q->u.attributesPlusTerm; + Z_Term *term = apt->term; + const char *sterm = 0; + size_t lterm = 0; + + wrbuf_rewind(w); + ret = rpn2solr_attr(ct, apt->attributes, w); + + switch(term->which) + { + case Z_Term_general: + lterm = term->u.general->len; + sterm = (const char *) term->u.general->buf; + break; + case Z_Term_numeric: + wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric); + break; + case Z_Term_characterString: + sterm = term->u.characterString; + lterm = strlen(sterm); + break; + default: + ret = -1; + solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0); + } + + if (term) + { + size_t i; + int must_quote = 0; + for (i = 0 ; i < lterm; i++) + if (sterm[i] == ' ') + must_quote = 1; + if (must_quote) + wrbuf_puts(w, "\""); + /* Bug 2878: Check and add Truncation */ + if (checkForLeftTruncation(apt->attributes)) + wrbuf_puts(w, "*"); + wrbuf_write(w, sterm, lterm); + /* Bug 2878: Check and add Truncation */ + if (checkForRightTruncation(apt->attributes)) + wrbuf_puts(w, "*"); + if (must_quote) + wrbuf_puts(w, "\""); + } + if (ret == 0) + pr(wrbuf_cstr(w), client_data); + } + return ret; +} + + +static int rpn2solr_structure(solr_transform_t ct, + void (*pr)(const char *buf, void *client_data), + void *client_data, + Z_RPNStructure *q, int nested, + WRBUF w) +{ + if (q->which == Z_RPNStructure_simple) + return rpn2solr_simple(ct, pr, client_data, q->u.simple, w); + else + { + Z_Operator *op = q->u.complex->roperator; + int r; + + if (nested) + pr("(", client_data); + + r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w); + if (r) + return r; + switch(op->which) + { + case Z_Operator_and: + pr(" AND ", client_data); + break; + case Z_Operator_or: + pr(" OR ", client_data); + break; + case Z_Operator_and_not: + pr(" AND NOT ", client_data); + break; + case Z_Operator_prox: + solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0); + return -1; + } + r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w); + if (nested) + pr(")", client_data); + return r; + } +} + +int solr_transform_rpn2solr_stream(solr_transform_t ct, + void (*pr)(const char *buf, void *client_data), + void *client_data, + Z_RPNQuery *q) +{ + int r; + WRBUF w = wrbuf_alloc(); + solr_transform_set_error(ct, 0, 0); + r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w); + wrbuf_destroy(w); + return r; +} + + +int solr_transform_rpn2solr_wrbuf(solr_transform_t ct, + WRBUF w, + Z_RPNQuery *q) +{ + return solr_transform_rpn2solr_stream(ct, wrbuf_vputs, w, q); +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/solrtransform.c b/src/solrtransform.c new file mode 100644 index 0000000..b800ca9 --- /dev/null +++ b/src/solrtransform.c @@ -0,0 +1,995 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data + * See the file LICENSE for details. + */ + +/** + * \file solrtransform.c + * \brief Implements SOLR transform (SOLR to RPN conversion). + * + * Evaluation order of rules: + * + * always + * relation + * structure + * position + * truncation + * index + * relationModifier + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct solr_prop_entry { + char *pattern; + char *value; + Z_AttributeList attr_list; + struct solr_prop_entry *next; +}; + +struct solr_transform_t_ { + struct solr_prop_entry *entry; + yaz_tok_cfg_t tok_cfg; + int error; + char *addinfo; + WRBUF w; + NMEM nmem; +}; + + +/* TODO Utility functions, evt. split out int separate file */ +int solr_strcmp(const char *s1, const char *s2) { + return cql_strcmp(s1, s2); +} + +int solr_strncmp(const char *s1, const char *s2, size_t n) { + return cql_strncmp(s1, s2, n); +} + +/* TODO FIX */ +const char *solr_uri(void) +{ + return "TODO:SOLR URI"; +} + +void solr_buf_write_handler (const char *b, void *client_data) +{ + struct solr_buf_write_info *info = (struct solr_buf_write_info *)client_data; + int l = strlen(b); + if (info->off < 0 || (info->off + l >= info->max)) + { + info->off = -1; + return; + } + memcpy (info->buf + info->off, b, l); + info->off += l; +} + + +/* Utillity functions end */ + +solr_transform_t solr_transform_create(void) +{ + solr_transform_t ct = (solr_transform_t) xmalloc(sizeof(*ct)); + ct->tok_cfg = yaz_tok_cfg_create(); + ct->w = wrbuf_alloc(); + ct->error = 0; + ct->addinfo = 0; + ct->entry = 0; + ct->nmem = nmem_create(); + return ct; +} + +static int solr_transform_parse_tok_line(solr_transform_t ct, + const char *pattern, + yaz_tok_parse_t tp) +{ + int ae_num = 0; + Z_AttributeElement *ae[20]; + int ret = 0; /* 0=OK, != 0 FAIL */ + int t; + t = yaz_tok_move(tp); + + while (t == YAZ_TOK_STRING && ae_num < 20) + { + WRBUF type_str = wrbuf_alloc(); + WRBUF set_str = 0; + Z_AttributeElement *elem = 0; + const char *value_str = 0; + /* attset type=value OR type=value */ + + elem = (Z_AttributeElement *) nmem_malloc(ct->nmem, sizeof(*elem)); + elem->attributeSet = 0; + ae[ae_num] = elem; + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + wrbuf_puts(type_str, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + if (t == YAZ_TOK_EOF) + { + wrbuf_destroy(type_str); + if (set_str) + wrbuf_destroy(set_str); + break; + } + if (t == YAZ_TOK_STRING) + { + wrbuf_puts(ct->w, " "); + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + set_str = type_str; + + elem->attributeSet = + yaz_string_to_oid_nmem(yaz_oid_std(), CLASS_ATTSET, + wrbuf_cstr(set_str), ct->nmem); + + type_str = wrbuf_alloc(); + wrbuf_puts(type_str, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + } + elem->attributeType = nmem_intdup(ct->nmem, 0); + if (sscanf(wrbuf_cstr(type_str), ODR_INT_PRINTF, elem->attributeType) + != 1) + { + wrbuf_destroy(type_str); + if (set_str) + wrbuf_destroy(set_str); + yaz_log(YLOG_WARN, "Expected numeric attribute type"); + ret = -1; + break; + } + + wrbuf_destroy(type_str); + if (set_str) + wrbuf_destroy(set_str); + + if (t != '=') + { + yaz_log(YLOG_WARN, "Expected = after after attribute type"); + ret = -1; + break; + } + t = yaz_tok_move(tp); + if (t != YAZ_TOK_STRING) /* value */ + { + yaz_log(YLOG_WARN, "Missing attribute value"); + ret = -1; + break; + } + value_str = yaz_tok_parse_string(tp); + if (isdigit(*value_str)) + { + elem->which = Z_AttributeValue_numeric; + elem->value.numeric = + nmem_intdup(ct->nmem, atoi(value_str)); + } + else + { + Z_ComplexAttribute *ca = (Z_ComplexAttribute *) + nmem_malloc(ct->nmem, sizeof(*ca)); + elem->which = Z_AttributeValue_complex; + elem->value.complex = ca; + ca->num_list = 1; + ca->list = (Z_StringOrNumeric **) + nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric *)); + ca->list[0] = (Z_StringOrNumeric *) + nmem_malloc(ct->nmem, sizeof(Z_StringOrNumeric)); + ca->list[0]->which = Z_StringOrNumeric_string; + ca->list[0]->u.string = nmem_strdup(ct->nmem, value_str); + ca->num_semanticAction = 0; + ca->semanticAction = 0; + } + wrbuf_puts(ct->w, "="); + wrbuf_puts(ct->w, yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + wrbuf_puts(ct->w, " "); + ae_num++; + } + if (ret == 0) /* OK? */ + { + struct solr_prop_entry **pp = &ct->entry; + while (*pp) + pp = &(*pp)->next; + *pp = (struct solr_prop_entry *) xmalloc(sizeof(**pp)); + (*pp)->pattern = xstrdup(pattern); + (*pp)->value = xstrdup(wrbuf_cstr(ct->w)); + + (*pp)->attr_list.num_attributes = ae_num; + if (ae_num == 0) + (*pp)->attr_list.attributes = 0; + else + { + (*pp)->attr_list.attributes = (Z_AttributeElement **) + nmem_malloc(ct->nmem, + ae_num * sizeof(Z_AttributeElement *)); + memcpy((*pp)->attr_list.attributes, ae, + ae_num * sizeof(Z_AttributeElement *)); + } + (*pp)->next = 0; + + if (0) + { + ODR pr = odr_createmem(ODR_PRINT); + Z_AttributeList *alp = &(*pp)->attr_list; + odr_setprint(pr, yaz_log_file()); + z_AttributeList(pr, &alp, 0, 0); + odr_setprint(pr, 0); + odr_destroy(pr); + } + } + return ret; +} + +int solr_transform_define_pattern(solr_transform_t ct, const char *pattern, + const char *value) +{ + int r; + yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, value); + yaz_tok_cfg_single_tokens(ct->tok_cfg, "="); + r = solr_transform_parse_tok_line(ct, pattern, tp); + yaz_tok_parse_destroy(tp); + return r; +} + +solr_transform_t solr_transform_open_FILE(FILE *f) +{ + solr_transform_t ct = solr_transform_create(); + char line[1024]; + + yaz_tok_cfg_single_tokens(ct->tok_cfg, "="); + + while (fgets(line, sizeof(line)-1, f)) + { + yaz_tok_parse_t tp = yaz_tok_parse_buf(ct->tok_cfg, line); + int t; + wrbuf_rewind(ct->w); + t = yaz_tok_move(tp); + if (t == YAZ_TOK_STRING) + { + char * pattern = xstrdup(yaz_tok_parse_string(tp)); + t = yaz_tok_move(tp); + if (t != '=') + { + yaz_tok_parse_destroy(tp); + solr_transform_close(ct); + return 0; + } + if (solr_transform_parse_tok_line(ct, pattern, tp)) + { + yaz_tok_parse_destroy(tp); + solr_transform_close(ct); + return 0; + } + xfree(pattern); + } + else if (t != YAZ_TOK_EOF) + { + yaz_tok_parse_destroy(tp); + solr_transform_close(ct); + return 0; + } + yaz_tok_parse_destroy(tp); + } + return ct; +} + +void solr_transform_close(solr_transform_t ct) +{ + struct solr_prop_entry *pe; + if (!ct) + return; + pe = ct->entry; + while (pe) + { + struct solr_prop_entry *pe_next = pe->next; + xfree(pe->pattern); + xfree(pe->value); + xfree(pe); + pe = pe_next; + } + xfree(ct->addinfo); + yaz_tok_cfg_destroy(ct->tok_cfg); + wrbuf_destroy(ct->w); + nmem_destroy(ct->nmem); + xfree(ct); +} + +solr_transform_t solr_transform_open_fname(const char *fname) +{ + solr_transform_t ct; + FILE *f = fopen(fname, "r"); + if (!f) + return 0; + ct = solr_transform_open_FILE(f); + fclose(f); + return ct; +} + +#if 0 +struct Z_AttributeElement { + Z_AttributeSetId *attributeSet; /* OPT */ + int *attributeType; + int which; + union { + int *numeric; + Z_ComplexAttribute *complex; +#define Z_AttributeValue_numeric 1 +#define Z_AttributeValue_complex 2 + } value; +}; +#endif + +static int compare_attr(Z_AttributeElement *a, Z_AttributeElement *b) +{ + ODR odr_a = odr_createmem(ODR_ENCODE); + ODR odr_b = odr_createmem(ODR_ENCODE); + int len_a, len_b; + char *buf_a, *buf_b; + int ret; + + z_AttributeElement(odr_a, &a, 0, 0); + z_AttributeElement(odr_b, &b, 0, 0); + + buf_a = odr_getbuf(odr_a, &len_a, 0); + buf_b = odr_getbuf(odr_b, &len_b, 0); + + ret = yaz_memcmp(buf_a, buf_b, len_a, len_b); + + odr_destroy(odr_a); + odr_destroy(odr_b); + return ret; +} + +const char *solr_lookup_reverse(solr_transform_t ct, + const char *category, + Z_AttributeList *attributes) +{ + struct solr_prop_entry *e; + size_t clen = strlen(category); + for (e = ct->entry; e; e = e->next) + { + if (!strncmp(e->pattern, category, clen)) + { + /* category matches.. See if attributes in pattern value + are all listed in actual attributes */ + int i; + for (i = 0; i < e->attr_list.num_attributes; i++) + { + /* entry attribute */ + Z_AttributeElement *e_ae = e->attr_list.attributes[i]; + int j; + for (j = 0; j < attributes->num_attributes; j++) + { + /* actual attribute */ + Z_AttributeElement *a_ae = attributes->attributes[j]; + int r = compare_attr(e_ae, a_ae); + if (r == 0) + break; + } + if (j == attributes->num_attributes) + break; /* i was not found at all.. try next pattern */ + + } + if (i == e->attr_list.num_attributes) + return e->pattern + clen; + } + } + return 0; +} + +static const char *solr_lookup_property(solr_transform_t ct, + const char *pat1, const char *pat2, + const char *pat3) +{ + char pattern[120]; + struct solr_prop_entry *e; + + if (pat1 && pat2 && pat3) + sprintf(pattern, "%.39s.%.39s.%.39s", pat1, pat2, pat3); + else if (pat1 && pat2) + sprintf(pattern, "%.39s.%.39s", pat1, pat2); + else if (pat1 && pat3) + sprintf(pattern, "%.39s.%.39s", pat1, pat3); + else if (pat1) + sprintf(pattern, "%.39s", pat1); + else + return 0; + + for (e = ct->entry; e; e = e->next) + { + if (!solr_strcmp(e->pattern, pattern)) + return e->value; + } + return 0; +} + +int solr_pr_attr_uri(solr_transform_t ct, const char *category, + const char *uri, const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) +{ + const char *res = 0; + const char *eval = val ? val : default_val; + const char *prefix = 0; + + if (uri) + { + struct solr_prop_entry *e; + + for (e = ct->entry; e; e = e->next) + if (!memcmp(e->pattern, "set.", 4) && e->value && + !strcmp(e->value, uri)) + { + prefix = e->pattern+4; + break; + } + /* must have a prefix now - if not it's an error */ + } + + if (!uri || prefix) + { + if (!res) + res = solr_lookup_property(ct, category, prefix, eval); + /* we have some aliases for some relations unfortunately.. */ + if (!res && !prefix && !strcmp(category, "relation")) + { + if (!strcmp(val, "==")) + res = solr_lookup_property(ct, category, prefix, "exact"); + if (!strcmp(val, "=")) + res = solr_lookup_property(ct, category, prefix, "eq"); + if (!strcmp(val, "<=")) + res = solr_lookup_property(ct, category, prefix, "le"); + if (!strcmp(val, ">=")) + res = solr_lookup_property(ct, category, prefix, "ge"); + } + if (!res) + res = solr_lookup_property(ct, category, prefix, "*"); + } + if (res) + { + char buf[64]; + + const char *cp0 = res, *cp1; + while ((cp1 = strchr(cp0, '='))) + { + int i; + while (*cp1 && *cp1 != ' ') + cp1++; + if (cp1 - cp0 >= (ptrdiff_t) sizeof(buf)) + break; + memcpy(buf, cp0, cp1 - cp0); + buf[cp1-cp0] = 0; + (*pr)("@attr ", client_data); + + for (i = 0; buf[i]; i++) + { + if (buf[i] == '*') + (*pr)(eval, client_data); + else + { + char tmp[2]; + tmp[0] = buf[i]; + tmp[1] = '\0'; + (*pr)(tmp, client_data); + } + } + (*pr)(" ", client_data); + cp0 = cp1; + while (*cp0 == ' ') + cp0++; + } + return 1; + } + /* error ... */ + if (errcode && !ct->error) + { + ct->error = errcode; + if (val) + ct->addinfo = xstrdup(val); + else + ct->addinfo = 0; + } + return 0; +} + +int solr_pr_attr(solr_transform_t ct, const char *category, + const char *val, const char *default_val, + void (*pr)(const char *buf, void *client_data), + void *client_data, + int errcode) +{ + return solr_pr_attr_uri(ct, category, 0 /* uri */, + val, default_val, pr, client_data, errcode); +} + + +static void solr_pr_int(int val, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + char buf[21]; /* enough characters to 2^64 */ + sprintf(buf, "%d", val); + (*pr)(buf, client_data); + (*pr)(" ", client_data); +} + + +static int solr_pr_prox(solr_transform_t ct, struct solr_node *mods, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + int exclusion = 0; + int distance; /* to be filled in later depending on unit */ + int distance_defined = 0; + int ordered = 0; + int proxrel = 2; /* less than or equal */ + int unit = 2; /* word */ + + while (mods) + { + const char *name = mods->u.st.index; + const char *term = mods->u.st.term; + const char *relation = mods->u.st.relation; + + if (!strcmp(name, "distance")) { + distance = strtol(term, (char**) 0, 0); + distance_defined = 1; + if (!strcmp(relation, "=")) + proxrel = 3; + else if (!strcmp(relation, ">")) + proxrel = 5; + else if (!strcmp(relation, "<")) + proxrel = 1; + else if (!strcmp(relation, ">=")) + proxrel = 4; + else if (!strcmp(relation, "<=")) + proxrel = 2; + else if (!strcmp(relation, "<>")) + proxrel = 6; + else + { + ct->error = YAZ_SRW_UNSUPP_PROX_RELATION; + ct->addinfo = xstrdup(relation); + return 0; + } + } + else if (!strcmp(name, "ordered")) + ordered = 1; + else if (!strcmp(name, "unordered")) + ordered = 0; + else if (!strcmp(name, "unit")) + { + if (!strcmp(term, "word")) + unit = 2; + else if (!strcmp(term, "sentence")) + unit = 3; + else if (!strcmp(term, "paragraph")) + unit = 4; + else if (!strcmp(term, "element")) + unit = 8; + else + { + ct->error = YAZ_SRW_UNSUPP_PROX_UNIT; + ct->addinfo = xstrdup(term); + return 0; + } + } + else + { + ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; + ct->addinfo = xstrdup(name); + return 0; + } + mods = mods->u.st.modifiers; + } + + if (!distance_defined) + distance = (unit == 2) ? 1 : 0; + + solr_pr_int(exclusion, pr, client_data); + solr_pr_int(distance, pr, client_data); + solr_pr_int(ordered, pr, client_data); + solr_pr_int(proxrel, pr, client_data); + (*pr)("k ", client_data); + solr_pr_int(unit, pr, client_data); + + return 1; +} + +/* Returns location of first wildcard character in the `length' + * characters starting at `term', or a null pointer of there are + * none -- like memchr(). + */ +static const char *wcchar(int start, const char *term, int length) +{ + while (length > 0) + { + if (start || term[-1] != '\\') + if (strchr("*?", *term)) + return term; + term++; + length--; + start = 0; + } + return 0; +} + + +/* ### checks for SOLR relation-name rather than Type-1 attribute */ +static int has_modifier(struct solr_node *cn, const char *name) { + struct solr_node *mod; + for (mod = cn->u.st.modifiers; mod != 0; mod = mod->u.st.modifiers) { + if (!strcmp(mod->u.st.index, name)) + return 1; + } + + return 0; +} + + +void emit_term(solr_transform_t ct, + struct solr_node *cn, + const char *term, int length, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + int i; + const char *ns = cn->u.st.index_uri; + int process_term = !has_modifier(cn, "regexp"); + char *z3958_mem = 0; + + assert(cn->which == SOLR_NODE_ST); + + if (process_term && length > 0) + { + if (length > 1 && term[0] == '^' && term[length-1] == '^') + { + solr_pr_attr(ct, "position", "firstAndLast", 0, + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); + term++; + length -= 2; + } + else if (term[0] == '^') + { + solr_pr_attr(ct, "position", "first", 0, + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); + term++; + length--; + } + else if (term[length-1] == '^') + { + solr_pr_attr(ct, "position", "last", 0, + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); + length--; + } + else + { + solr_pr_attr(ct, "position", "any", 0, + pr, client_data, YAZ_SRW_ANCHORING_CHAR_IN_UNSUPP_POSITION); + } + } + + if (process_term && length > 0) + { + const char *first_wc = wcchar(1, term, length); + const char *second_wc = first_wc ? + wcchar(0, first_wc+1, length-(first_wc-term)-1) : 0; + + /* Check for well-known globbing patterns that represent + * simple truncation attributes as expected by, for example, + * Bath-compliant server. If we find such a pattern but + * there's no mapping for it, that's fine: we just use a + * general pattern-matching attribute. + */ + if (first_wc == term && second_wc == term + length-1 + && *first_wc == '*' && *second_wc == '*' + && solr_pr_attr(ct, "truncation", "both", 0, pr, client_data, 0)) + { + term++; + length -= 2; + } + else if (first_wc == term && second_wc == 0 && *first_wc == '*' + && solr_pr_attr(ct, "truncation", "left", 0, + pr, client_data, 0)) + { + term++; + length--; + } + else if (first_wc == term + length-1 && second_wc == 0 + && *first_wc == '*' + && solr_pr_attr(ct, "truncation", "right", 0, + pr, client_data, 0)) + { + length--; + } + else if (first_wc) + { + /* We have one or more wildcard characters, but not in a + * way that can be dealt with using only the standard + * left-, right- and both-truncation attributes. We need + * to translate the pattern into a Z39.58-type pattern, + * which has been supported in BIB-1 since 1996. If + * there's no configuration element for "truncation.z3958" + * we indicate this as error 28 "Masking character not + * supported". + */ + int i; + solr_pr_attr(ct, "truncation", "z3958", 0, + pr, client_data, YAZ_SRW_MASKING_CHAR_UNSUPP); + z3958_mem = (char *) xmalloc(length+1); + for (i = 0; i < length; i++) + { + if (i > 0 && term[i-1] == '\\') + z3958_mem[i] = term[i]; + else if (term[i] == '*') + z3958_mem[i] = '?'; + else if (term[i] == '?') + z3958_mem[i] = '#'; + else + z3958_mem[i] = term[i]; + } + z3958_mem[length] = '\0'; + term = z3958_mem; + } + else { + /* No masking characters. Use "truncation.none" if given. */ + solr_pr_attr(ct, "truncation", "none", 0, + pr, client_data, 0); + } + } + if (ns) { + solr_pr_attr_uri(ct, "index", ns, + cn->u.st.index, "serverChoice", + pr, client_data, YAZ_SRW_UNSUPP_INDEX); + } + if (cn->u.st.modifiers) + { + struct solr_node *mod = cn->u.st.modifiers; + for (; mod; mod = mod->u.st.modifiers) + { + solr_pr_attr(ct, "relationModifier", mod->u.st.index, 0, + pr, client_data, YAZ_SRW_UNSUPP_RELATION_MODIFIER); + } + } + + (*pr)("\"", client_data); + for (i = 0; iu.st.extra_terms; + if (ne) + { + (*pr)("@", client_data); + (*pr)(op, client_data); + (*pr)(" ", client_data); + } + emit_term(ct, cn, cn->u.st.term, strlen(cn->u.st.term), + pr, client_data); + for (; ne; ne = ne->u.st.extra_terms) + { + if (ne->u.st.extra_terms) + { + (*pr)("@", client_data); + (*pr)(op, client_data); + (*pr)(" ", client_data); + } + emit_term(ct, cn, ne->u.st.term, strlen(ne->u.st.term), + pr, client_data); + } +} + +void emit_wordlist(solr_transform_t ct, + struct solr_node *cn, + void (*pr)(const char *buf, void *client_data), + void *client_data, + const char *op) +{ + const char *cp0 = cn->u.st.term; + const char *cp1; + const char *last_term = 0; + int last_length = 0; + while(cp0) + { + while (*cp0 == ' ') + cp0++; + cp1 = strchr(cp0, ' '); + if (last_term) + { + (*pr)("@", client_data); + (*pr)(op, client_data); + (*pr)(" ", client_data); + emit_term(ct, cn, last_term, last_length, pr, client_data); + } + last_term = cp0; + if (cp1) + last_length = cp1 - cp0; + else + last_length = strlen(cp0); + cp0 = cp1; + } + if (last_term) + emit_term(ct, cn, last_term, last_length, pr, client_data); +} + +void solr_transform_r(solr_transform_t ct, + struct solr_node *cn, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + const char *ns; + struct solr_node *mods; + + if (!cn) + return; + switch (cn->which) + { + case SOLR_NODE_ST: + ns = cn->u.st.index_uri; + if (ns) + { + /* TODO If relevant fix with solr_uri */ + if (!strcmp(ns, solr_uri()) + && cn->u.st.index && !solr_strcmp(cn->u.st.index, "resultSet")) + { + (*pr)("@set \"", client_data); + (*pr)(cn->u.st.term, client_data); + (*pr)("\" ", client_data); + return ; + } + } + else + { + if (!ct->error) + { + ct->error = YAZ_SRW_UNSUPP_CONTEXT_SET; + ct->addinfo = 0; + } + } + solr_pr_attr(ct, "always", 0, 0, pr, client_data, 0); + solr_pr_attr(ct, "relation", cn->u.st.relation, 0, pr, client_data, + YAZ_SRW_UNSUPP_RELATION); + solr_pr_attr(ct, "structure", cn->u.st.relation, 0, + pr, client_data, YAZ_SRW_UNSUPP_COMBI_OF_RELATION_AND_TERM); + if (cn->u.st.relation && !solr_strcmp(cn->u.st.relation, "all")) + emit_wordlist(ct, cn, pr, client_data, "and"); + else if (cn->u.st.relation && !solr_strcmp(cn->u.st.relation, "any")) + emit_wordlist(ct, cn, pr, client_data, "or"); + else + emit_terms(ct, cn, pr, client_data, "and"); + break; + case SOLR_NODE_BOOL: + (*pr)("@", client_data); + (*pr)(cn->u.boolean.value, client_data); + (*pr)(" ", client_data); + mods = cn->u.boolean.modifiers; + if (!strcmp(cn->u.boolean.value, "prox")) + { + if (!solr_pr_prox(ct, mods, pr, client_data)) + return; + } + else if (mods) + { + /* Boolean modifiers other than on proximity not supported */ + ct->error = YAZ_SRW_UNSUPP_BOOLEAN_MODIFIER; + ct->addinfo = xstrdup(mods->u.st.index); + return; + } + + solr_transform_r(ct, cn->u.boolean.left, pr, client_data); + solr_transform_r(ct, cn->u.boolean.right, pr, client_data); + break; + + default: + fprintf(stderr, "Fatal: impossible SOLR node-type %d\n", cn->which); + abort(); + } +} + +int solr_transform(solr_transform_t ct, struct solr_node *cn, + void (*pr)(const char *buf, void *client_data), + void *client_data) +{ + struct solr_prop_entry *e; + NMEM nmem = nmem_create(); + + ct->error = 0; + xfree(ct->addinfo); + ct->addinfo = 0; + + for (e = ct->entry; e ; e = e->next) + { + /* TODO remove as SOLR dont supports sets. + if (!solr_strncmp(e->pattern, "set.", 4)) + solr_apply_prefix(nmem, cn, e->pattern+4, e->value); + else if (!solr_strcmp(e->pattern, "set")) + solr_apply_prefix(nmem, cn, 0, e->value); + */ + } + solr_transform_r(ct, cn, pr, client_data); + nmem_destroy(nmem); + return ct->error; +} + + +int solr_transform_FILE(solr_transform_t ct, struct solr_node *cn, FILE *f) +{ + /* We can use the cql_fputs util */ + return solr_transform(ct, cn, cql_fputs, f); +} + +int solr_transform_buf(solr_transform_t ct, struct solr_node *cn, char *out, int max) +{ + struct solr_buf_write_info info; + int r; + + info.off = 0; + info.max = max; + info.buf = out; + r = solr_transform(ct, cn, cql_buf_write_handler, &info); + if (info.off < 0) { + /* Attempt to write past end of buffer. For some reason, this + SRW diagnostic is deprecated, but it's so perfect for our + purposes that it would be stupid not to use it. */ + char numbuf[30]; + ct->error = YAZ_SRW_TOO_MANY_CHARS_IN_QUERY; + sprintf(numbuf, "%ld", (long) info.max); + ct->addinfo = xstrdup(numbuf); + return -1; + } + if (info.off >= 0) + info.buf[info.off] = '\0'; + return r; +} + +int solr_transform_error(solr_transform_t ct, const char **addinfo) +{ + *addinfo = ct->addinfo; + return ct->error; +} + +void solr_transform_set_error(solr_transform_t ct, int error, const char *addinfo) +{ + xfree(ct->addinfo); + ct->addinfo = addinfo ? xstrdup(addinfo) : 0; + ct->error = error; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4