X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Frpn2solr.c;h=c9143c7d148ef22dc17b9e40ea10901f41db2a31;hp=076e3a9f83043309f38862573e2234e13812cdde;hb=406ec3b354a9260bf09cc80c540d3d19845dc915;hpb=744a00b201a04b308f22fad1ece422b97d2cdd50 diff --git a/src/rpn2solr.c b/src/rpn2solr.c index 076e3a9..c9143c7 100644 --- a/src/rpn2solr.c +++ b/src/rpn2solr.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2013 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ /** @@ -69,15 +69,15 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes) { /* Unsure on whether this is the relation attribute constants? */ case Z_ProximityOperator_Prox_lessThan: - return 0; + return "<"; case Z_ProximityOperator_Prox_lessThanOrEqual: - return 0; + return "le"; case Z_ProximityOperator_Prox_equal: return ":"; case Z_ProximityOperator_Prox_greaterThanOrEqual: - return 0; + return "ge"; case Z_ProximityOperator_Prox_greaterThan: - return 0; + return ">"; case Z_ProximityOperator_Prox_notEqual: return 0; case 100: @@ -104,88 +104,84 @@ static const char *lookup_relation_index_from_attr(Z_AttributeList *attributes) return ":"; } -struct solr_attr { - const char *index; - const char *relation; - const char *term; - int is_range; - const char *begin; - const char *close; -}; +static int check_range(solr_transform_t ct, Z_Complex *q, + Z_AttributesPlusTerm **p_apt1, + Z_AttributesPlusTerm **p_apt2) +{ + Z_Operator *op = q->roperator; + if (op->which == Z_Operator_and && + q->s1->which == Z_RPNStructure_simple && + q->s2->which == Z_RPNStructure_simple && + q->s1->u.simple->which == Z_Operand_APT && + q->s2->u.simple->which == Z_Operand_APT) + { + Z_AttributesPlusTerm *apt1 = q->s1->u.simple->u.attributesPlusTerm; + Z_AttributesPlusTerm *apt2 = q->s2->u.simple->u.attributesPlusTerm; + const char *i1 = solr_lookup_reverse(ct, "index.", apt1->attributes); + const char *i2 = solr_lookup_reverse(ct, "index.", apt2->attributes); + const char *rel1 = solr_lookup_reverse(ct, "relation.", + apt1->attributes); + const char *rel2 = solr_lookup_reverse(ct, "relation.", + apt2->attributes); + if (!rel1) + rel1 = lookup_relation_index_from_attr(apt1->attributes); + if (!rel2) + rel2 = lookup_relation_index_from_attr(apt2->attributes); + if (!i1) + i1 = lookup_index_from_string_attr(apt1->attributes); + if (!i2) + i2 = lookup_index_from_string_attr(apt2->attributes); + if (i1 && i2 && !strcmp(i1, i2) && rel1 && rel2) + { + if ((rel1[0] == '>' || rel1[0] == 'g') && + (rel2[0] == '<' || rel2[0] == 'l')) + { + *p_apt1 = apt1; + *p_apt2 = apt2; + return 1; + } + if ((rel2[0] == '>' || rel2[0] == 'g') && + (rel1[0] == '<' || rel1[0] == 'l')) + { + *p_apt1 = apt2; + *p_apt2 = apt1; + return 1; + } + } + } + return 0; +} static int rpn2solr_attr(solr_transform_t ct, - Z_AttributeList *attributes, WRBUF w, struct solr_attr *solr_attr) + Z_AttributeList *attributes, WRBUF w) { - const char *relation = solr_lookup_reverse(ct, "relation.", attributes); - const char *index = solr_lookup_reverse(ct, "index.", attributes); + const char *index = solr_lookup_reverse(ct, "index.", attributes); const char *structure = solr_lookup_reverse(ct, "structure.", attributes); - /* Assume this is not a range */ - solr_attr->is_range = 0; /* if transform (properties) do not match, we'll just use a USE string attribute (bug #2978) */ if (!index) index = lookup_index_from_string_attr(attributes); - - /* Attempt to fix bug #2978: Look for a relation attribute */ - if (!relation) - relation = lookup_relation_index_from_attr(attributes); - if (!index) { - solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0); + solr_transform_set_error(ct, + YAZ_BIB1_UNSUPP_USE_ATTRIBUTE, 0); return -1; } /* for serverChoice we omit index+relation+structure */ if (strcmp(index, "cql.serverChoice")) { - solr_attr->index = index; - if (relation) - { - if (!strcmp(relation, "exact")) { - /* TODO Exact match does not exists in SOLR. Need to use specific field type */ - relation = ":"; - } - else if (!strcmp(relation, "eq")) { - relation = ":"; - } - else if (!strcmp(relation, "<")) { - solr_attr->is_range = 1; - solr_attr->begin = "[* TO "; - solr_attr->close = "}"; - } - else if (!strcmp(relation, "le")) { - solr_attr->is_range = 2; - solr_attr->begin = "[* TO "; - solr_attr->close = "]"; - } - else if (!strcmp(relation, "ge")) { - solr_attr->is_range = 3; - solr_attr->begin = "["; - solr_attr->close = " TO *]"; - } - else if (!strcmp(relation, ">")) { - solr_attr->is_range = 4; - solr_attr->begin = "{"; - solr_attr->close = " TO *]"; - } - solr_attr->relation = relation; - } - // TODO is this valid for Solr? - solr_attr->term = 0; + wrbuf_puts(w, index); + wrbuf_puts(w, ":"); if (structure) { if (strcmp(structure, "*")) { - wrbuf_puts(w, "/"); - wrbuf_puts(w, structure); - wrbuf_puts(w, " "); - solr_attr->index = 0; + wrbuf_puts(w, "/"); + wrbuf_puts(w, structure); + wrbuf_puts(w, " "); } - } } - else - solr_attr->index = 0; return 0; } @@ -215,230 +211,201 @@ static Odr_int get_truncation(Z_AttributesPlusTerm *apt) #define SOLR_SPECIAL "+-&|!(){}[]^\"~*?:\\" -static int rpn2solr_simple(solr_transform_t ct, - Z_Operand *q, WRBUF w, struct solr_attr *solr_attr) +static int emit_term(solr_transform_t ct, WRBUF w, Z_Term *term, Odr_int trunc) { - int ret = 0; - if (q->which != Z_Operand_APT) + size_t lterm = 0; + const char *sterm = 0; + switch (term->which) { - ret = -1; - solr_transform_set_error(ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0); + case Z_Term_general: + lterm = term->u.general->len; + sterm = (const char *) term->u.general->buf; + break; + case Z_Term_numeric: + wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric); + break; + case Z_Term_characterString: + sterm = term->u.characterString; + lterm = strlen(sterm); + break; + default: + solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0); + return -1; } - else - { - Z_AttributesPlusTerm *apt = q->u.attributesPlusTerm; - Z_Term *term = apt->term; - const char *sterm = 0; - size_t lterm = 0; - Odr_int trunc = get_truncation(apt); - - wrbuf_rewind(w); - - ret = rpn2solr_attr(ct, apt->attributes, w, solr_attr); - if (trunc == 0 || trunc == 1 || trunc == 100 || trunc == 104) - ; - else - { - solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0); - return -1; - } - switch (term->which) - { - case Z_Term_general: - lterm = term->u.general->len; - sterm = (const char *) term->u.general->buf; - break; - case Z_Term_numeric: - wrbuf_printf(w, ODR_INT_PRINTF, *term->u.numeric); - break; - case Z_Term_characterString: - sterm = term->u.characterString; - lterm = strlen(sterm); - break; - default: - ret = -1; - solr_transform_set_error(ct, YAZ_BIB1_TERM_TYPE_UNSUPP, 0); - } - - if (sterm) + if (sterm) + { + size_t i; + int must_quote = 0; + + for (i = 0 ; i < lterm; i++) + if (sterm[i] == ' ') + must_quote = 1; + if (must_quote) + wrbuf_puts(w, "\""); + if (trunc == 2 || trunc == 3) + wrbuf_puts(w, "*"); + for (i = 0 ; i < lterm; i++) { - size_t i; - int must_quote = 0; - - for (i = 0 ; i < lterm; i++) - if (sterm[i] == ' ') - must_quote = 1; - if (must_quote) - wrbuf_puts(w, "\""); - for (i = 0 ; i < lterm; i++) + if (sterm[i] == '\\' && i < lterm - 1) { - if (sterm[i] == '\\' && i < lterm - 1) - { - i++; - if (strchr(SOLR_SPECIAL, sterm[i])) - wrbuf_putc(w, '\\'); - wrbuf_putc(w, sterm[i]); - } - else if (sterm[i] == '?' && trunc == 104) - { - wrbuf_putc(w, '*'); - } - else if (sterm[i] == '#' && trunc == 104) - { - wrbuf_putc(w, '?'); - } - else if (strchr(SOLR_SPECIAL, sterm[i])) - { + i++; + if (strchr(SOLR_SPECIAL, sterm[i])) wrbuf_putc(w, '\\'); - wrbuf_putc(w, sterm[i]); - } - else - wrbuf_putc(w, sterm[i]); + wrbuf_putc(w, sterm[i]); } - if (trunc == 1) - wrbuf_puts(w, "*"); - if (must_quote) - wrbuf_puts(w, "\""); - } - if (ret == 0) { - solr_attr->term = wrbuf_cstr(w); + else if (sterm[i] == '?' && trunc == 104) + { + wrbuf_putc(w, '*'); + } + else if (sterm[i] == '#' && trunc == 104) + { + wrbuf_putc(w, '?'); + } + else if (strchr(SOLR_SPECIAL, sterm[i])) + { + wrbuf_putc(w, '\\'); + wrbuf_putc(w, sterm[i]); + } + else + wrbuf_putc(w, sterm[i]); } - - } - return ret; -}; - -static int solr_write_range(void (*pr)(const char *buf, void *client_data), - void *client_data, - struct solr_attr *solr_attr_left, - struct solr_attr *solr_attr_right) -{ - pr(solr_attr_left->index, client_data); - pr(":", client_data); - pr(solr_attr_left->begin, client_data); - pr(solr_attr_left->term, client_data); - pr(" TO ", client_data); - pr(solr_attr_right->term, client_data); - pr(solr_attr_right->close, client_data); - return 0; -}; - -static int solr_write_structure(void (*pr)(const char *buf, void *client_data), - void *client_data, - struct solr_attr *solr_attr) -{ - if (solr_attr->index) { - pr(solr_attr->index, client_data); - pr(":", client_data); + if (trunc == 1 || trunc == 3) + wrbuf_puts(w, "*"); + if (must_quote) + wrbuf_puts(w, "\""); } - if (solr_attr->is_range) { - pr(solr_attr->begin, client_data); - pr(solr_attr->term, client_data); - pr(solr_attr->close, client_data); - } - else if (solr_attr->term) - pr(solr_attr->term, client_data); - return 0; -}; - - - -static int solr_write_and_or_range(void (*pr)(const char *buf, void *client_data), - void *client_data, - struct solr_attr *solr_attr_left, - struct solr_attr *solr_attr_right) -{ - if (solr_attr_left->is_range && - solr_attr_right->is_range && - !strcmp(solr_attr_left->index, solr_attr_right->index)) - { - if (solr_attr_left->is_range >= 3 && solr_attr_right->is_range <= 2) - return solr_write_range(pr, client_data, solr_attr_left, solr_attr_right); - else if (solr_attr_left->is_range <= 2 && solr_attr_right->is_range >= 2) - return solr_write_range(pr, client_data, solr_attr_right, solr_attr_left); - } - solr_write_structure(pr, client_data, solr_attr_left); - pr(" AND ", client_data); - solr_write_structure(pr, client_data, solr_attr_right); return 0; } -static void solr_attr_init(struct solr_attr *solr_attr) { - solr_attr->index = 0; - solr_attr->relation = 0; - solr_attr->is_range = 0; - solr_attr->term = 0; -} +static int rpn2solr_simple(solr_transform_t ct, + void (*pr)(const char *buf, void *client_data), + void *client_data, + Z_AttributesPlusTerm *apt, WRBUF w, + Z_AttributesPlusTerm *apt2) + { + int ret = 0; + Z_Term *term = apt->term; + Odr_int trunc = get_truncation(apt); + const char *relation2 = 0; + const char *relation1 = solr_lookup_reverse(ct, "relation.", + apt->attributes); + /* Attempt to fix bug #2978: Look for a relation attribute */ + if (!relation1) + relation1 = lookup_relation_index_from_attr(apt->attributes); + if (!relation1) + { + solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_RELATION_ATTRIBUTE, 0); + return -1; + } + if (apt2) + { + relation2 = solr_lookup_reverse(ct, "relation.", + apt2->attributes); + if (!relation2) + relation2 = lookup_relation_index_from_attr(apt2->attributes); + } + wrbuf_rewind(w); + ret = rpn2solr_attr(ct, apt->attributes, w); + if (ret) + return ret; + if ((trunc >= 0 && trunc <= 3) || trunc == 100 || trunc == 104) + ; + else + { + solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE, 0); + return -1; + } + + if (!relation1) + ret = emit_term(ct, w, term, trunc); + else if (relation1[0] == '<' || relation1[0] == 'l') + { + wrbuf_puts(w, "[* TO "); + ret = emit_term(ct, w, term, trunc); + if (!strcmp(relation1, "le") || !strcmp(relation1, "<=")) + wrbuf_puts(w, "]"); + else + wrbuf_puts(w, "}"); + } + else if (relation1[0] == '>' || relation1[0] == 'g') + { + if (!strcmp(relation1, ">=") || !strcmp(relation1, "ge")) + wrbuf_puts(w, "["); + else + wrbuf_puts(w, "{"); + ret = emit_term(ct, w, term, trunc); + wrbuf_puts(w, " TO "); + if (apt2) + { + emit_term(ct, w, apt2->term, 0); + if (!relation2 || !strcmp(relation2, "<=") || + !strcmp(relation2, "le")) + wrbuf_puts(w, "]"); + else + wrbuf_puts(w, "}"); + } + else + wrbuf_puts(w, "*]"); + } + else + ret = emit_term(ct, w, term, trunc); + if (ret == 0) + pr(wrbuf_cstr(w), client_data); + return ret; + } static int rpn2solr_structure(solr_transform_t ct, void (*pr)(const char *buf, void *client_data), - void *client_data, + void *client_data, Z_RPNStructure *q, int nested, - WRBUF wa, struct solr_attr *solr_attr) + WRBUF w) { - if (q->which == Z_RPNStructure_simple) { - solr_attr_init(solr_attr); - return rpn2solr_simple(ct, q->u.simple, wa, solr_attr); + if (q->which == Z_RPNStructure_simple) + { + if (q->u.simple->which != Z_Operand_APT) + { + solr_transform_set_error( + ct, YAZ_BIB1_RESULT_SET_UNSUPP_AS_A_SEARCH_TERM, 0); + return -1; + } + else + return rpn2solr_simple(ct, pr, client_data, + q->u.simple->u.attributesPlusTerm, w, 0); } else { Z_Operator *op = q->u.complex->roperator; + Z_AttributesPlusTerm *apt1, *apt2; int r; + if (check_range(ct, q->u.complex, &apt1, &apt2)) + return rpn2solr_simple(ct, pr, client_data, apt1, w, apt2); if (nested) pr("(", client_data); - struct solr_attr solr_attr_left; - solr_attr_init(&solr_attr_left); - WRBUF w_left = wrbuf_alloc(); - r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w_left, &solr_attr_left); - - - if (r) { - wrbuf_destroy(w_left); - return r; - } - struct solr_attr solr_attr_right; - solr_attr_init(&solr_attr_right); - WRBUF w_right = wrbuf_alloc(); - - r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w_right, &solr_attr_right); - if (r) { - wrbuf_destroy(w_left); - wrbuf_destroy(w_right); + r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s1, 1, w); + if (r) return r; - } - switch(op->which) { case Z_Operator_and: - solr_write_and_or_range(pr, client_data, &solr_attr_left, &solr_attr_right); + pr(" AND ", client_data); break; case Z_Operator_or: - solr_write_structure(pr, client_data, &solr_attr_left); pr(" OR ", client_data); - solr_write_structure(pr, client_data, &solr_attr_right); break; case Z_Operator_and_not: - solr_write_structure(pr, client_data, &solr_attr_left); pr(" AND NOT ", client_data); - solr_write_structure(pr, client_data, &solr_attr_right); break; case Z_Operator_prox: solr_transform_set_error(ct, YAZ_BIB1_UNSUPP_SEARCH, 0); - wrbuf_destroy(w_left); - wrbuf_destroy(w_right); return -1; } - + r = rpn2solr_structure(ct, pr, client_data, q->u.complex->s2, 1, w); if (nested) pr(")", client_data); - - solr_attr_init(solr_attr); - wrbuf_destroy(w_left); - wrbuf_destroy(w_right); return r; } } @@ -451,10 +418,7 @@ int solr_transform_rpn2solr_stream(solr_transform_t ct, int r; WRBUF w = wrbuf_alloc(); solr_transform_set_error(ct, 0, 0); - struct solr_attr solr_attr; - solr_attr_init(&solr_attr); - r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w, &solr_attr); - solr_write_structure(pr, client_data, &solr_attr); + r = rpn2solr_structure(ct, pr, client_data, q->RPNStructure, 0, w); wrbuf_destroy(w); return r; }