From 6671d1583ced9613ab7f12bfed69fd70fd9e6b15 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Sun, 19 Feb 2006 18:44:23 +0000 Subject: [PATCH] Implement yaz_xml2query. --- include/yaz/xmlquery.h | 5 +- src/xmlquery.c | 478 +++++++++++++++++++++++++++++++++++++++++++++++- test/tstxmlquery.c | 87 +++++++-- 3 files changed, 550 insertions(+), 20 deletions(-) diff --git a/include/yaz/xmlquery.h b/include/yaz/xmlquery.h index ff5d5fb..d2ce219 100644 --- a/include/yaz/xmlquery.h +++ b/include/yaz/xmlquery.h @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: xmlquery.h,v 1.2 2006-01-30 14:02:06 adam Exp $ + * $Id: xmlquery.h,v 1.3 2006-02-19 18:44:23 adam Exp $ */ #ifndef YAZ_XMLQUERY_H @@ -16,6 +16,9 @@ YAZ_BEGIN_CDECL YAZ_EXPORT void yaz_query2xml(const Z_Query *q, void *docp_void); YAZ_EXPORT void yaz_rpnquery2xml(const Z_RPNQuery *rpn, void *docp_void); +YAZ_EXPORT void yaz_xml2query(const void *xmlnodep, Z_Query **query, ODR odr, + int *error_code, const char **addinfo); + YAZ_END_CDECL #endif diff --git a/src/xmlquery.c b/src/xmlquery.c index 18800f2..446dec6 100644 --- a/src/xmlquery.c +++ b/src/xmlquery.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2005, Index Data ApS * All rights reserved. * - * $Id: xmlquery.c,v 1.3 2006-02-02 15:00:58 adam Exp $ + * $Id: xmlquery.c,v 1.4 2006-02-19 18:44:23 adam Exp $ */ /** @@ -11,6 +11,7 @@ */ #include +#include #include #if HAVE_XML2 @@ -190,6 +191,7 @@ void yaz_query2xml_operator(Z_Operator *op, xmlNodePtr node) xmlNewProp(node, BAD_CAST "knownProximityUnit", BAD_CAST formstr); break; + case Z_ProximityOperator_private: default: xmlNewProp(node, BAD_CAST "privateProximityUnit", BAD_CAST "private"); @@ -289,7 +291,6 @@ void yaz_query2xml(const Z_Query *q, void *docp_void) child_node = yaz_query2xml_cql(q->u.type_104->u.cql, top_node); } } - if (child_node && type) { *docp = xmlNewDoc(BAD_CAST "1.0"); @@ -305,12 +306,483 @@ void yaz_query2xml(const Z_Query *q, void *docp_void) } } -void yaz_xml2query(const xmlNode node, Z_Query **q, ODR odr) +bool_t *boolVal(ODR odr, const char *str) { + if (*str == '\0' || strchr("0fF", *str)) + return odr_intdup(odr, 0); + return odr_intdup(odr, 1); +} + +int *intVal(ODR odr, const char *str) +{ + return odr_intdup(odr, atoi(str)); +} +void yaz_xml2query_operator(const xmlNode *ptr, Z_Operator **op, + ODR odr, int *error_code, const char **addinfo) +{ + const char *type = (const char *) + xmlGetProp((xmlNodePtr) ptr, BAD_CAST "type"); + if (!type) + { + *error_code = 1; + *addinfo = "no operator type"; + return; + } + *op = (Z_Operator*) odr_malloc(odr, sizeof(Z_Operator)); + if (!strcmp(type, "and")) + { + (*op)->which = Z_Operator_and; + (*op)->u.op_and = odr_nullval(); + } + else if (!strcmp(type, "or")) + { + (*op)->which = Z_Operator_or; + (*op)->u.op_or = odr_nullval(); + } + else if (!strcmp(type, "not")) + { + (*op)->which = Z_Operator_and_not; + (*op)->u.and_not = odr_nullval(); + } + else if (!strcmp(type, "prox")) + { + const char *atval; + Z_ProximityOperator *pop = (Z_ProximityOperator *) + odr_malloc(odr, sizeof(Z_ProximityOperator)); + + (*op)->which = Z_Operator_prox; + (*op)->u.prox = pop; + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "exclusion"); + if (atval) + pop->exclusion = boolVal(odr, atval); + else + pop->exclusion = 0; + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "distance"); + if (atval) + pop->distance = intVal(odr, atval); + else + pop->distance = odr_intdup(odr, 1); + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "ordered"); + if (atval) + pop->ordered = boolVal(odr, atval); + else + pop->ordered = odr_intdup(odr, 1); + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "relationType"); + if (atval) + pop->relationType = intVal(odr, atval); + else + pop->relationType = + odr_intdup(odr, Z_ProximityOperator_Prox_lessThanOrEqual); + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "knownProximityUnit"); + if (atval) + { + pop->which = Z_ProximityOperator_known; + pop->u.known = intVal(odr, atval); + } + else + { + pop->which = Z_ProximityOperator_known; + pop->u.known = odr_intdup(odr, Z_ProxUnit_word); + } + + atval = (const char *) xmlGetProp((xmlNodePtr) ptr, + BAD_CAST "privateProximityUnit"); + if (atval) + { + pop->which = Z_ProximityOperator_private; + pop->u.zprivate = intVal(odr, atval); + } + } + else + { + *error_code = 1; + *addinfo = "bad operator type"; + } +} + +void yaz_xml2query_attribute_element(const xmlNode *ptr, + Z_AttributeElement **elem, ODR odr, + int *error_code, const char **addinfo) +{ + int i; + xmlChar *set = 0; + xmlChar *type = 0; + xmlChar *value = 0; + int num_values = 0; + struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "set") && + attr->children && attr->children->type == XML_TEXT_NODE) + set = attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "type") && + attr->children && attr->children->type == XML_TEXT_NODE) + type = attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "value") && + attr->children && attr->children->type == XML_TEXT_NODE) + { + value = attr->children->content; + num_values++; + } + else + { + *error_code = 1; + *addinfo = "bad attribute for attr content"; + return; + } + } + if (!type) + { + *error_code = 1; + *addinfo = "missing type attribute for att content"; + return; + } + if (!value) + { + *error_code = 1; + *addinfo = "missing value attribute for att content"; + return; + } + + *elem = (Z_AttributeElement *) odr_malloc(odr, sizeof(**elem)); + if (set) + (*elem)->attributeSet = yaz_str_to_z3950oid(odr, CLASS_ATTSET, + (const char *)set); + else + (*elem)->attributeSet = 0; + (*elem)->attributeType = intVal(odr, (const char *) type); + + /* looks like a number ? */ + for (i = 0; value[i] && value[i] >= '0' && value[i] <= '9'; i++) + ; + if (num_values > 1 || value[i]) + { /* multiple values or string, so turn to complex attribute */ + (*elem)->which = Z_AttributeValue_complex; + (*elem)->value.complex = + (Z_ComplexAttribute*) odr_malloc(odr, sizeof(Z_ComplexAttribute)); + (*elem)->value.complex->num_list = num_values; + (*elem)->value.complex->list = (Z_StringOrNumeric **) + odr_malloc(odr, sizeof(Z_StringOrNumeric*) * num_values); + + /* second pass over attr values */ + i = 0; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "value") && + attr->children && attr->children->type == XML_TEXT_NODE) + { + const char *val = (const char *) attr->children->content; + assert (i < num_values); + (*elem)->value.complex->list[i] = (Z_StringOrNumeric *) + odr_malloc(odr, sizeof(Z_StringOrNumeric)); + (*elem)->value.complex->list[i]->which = + Z_StringOrNumeric_string; + (*elem)->value.complex->list[i]->u.string = + odr_strdup(odr, val); + i++; + } + } + (*elem)->value.complex->num_semanticAction = 0; + (*elem)->value.complex->semanticAction = 0; + } + else + { /* good'ld numeric value */ + (*elem)->which = Z_AttributeValue_numeric; + (*elem)->value.numeric = intVal(odr, (const char *) value); + } +} +char *strVal(const xmlNode *ptr_cdata, ODR odr) +{ + char *cdata; + int len = 0; + const xmlNode *ptr; + + for (ptr = ptr_cdata; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + len += xmlStrlen(ptr->content); + cdata = (char *) odr_malloc(odr, len+1); + *cdata = '\0'; + for (ptr = ptr_cdata; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + strcat(cdata, (const char *) ptr->content); + return cdata; } +void yaz_xml2query_term(const xmlNode *ptr, + Z_Term **term, ODR odr, + int *error_code, const char **addinfo) +{ + xmlChar *type = 0; + struct _xmlAttr *attr; + char *cdata = strVal(ptr->children, odr); + + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "type") && + attr->children && attr->children->type == XML_TEXT_NODE) + type = attr->children->content; + else + { + *error_code = 1; + *addinfo = "bad attribute for attr content"; + return; + } + } + *term = (Z_Term *) odr_malloc(odr, sizeof(Z_Term)); + + if (!type || !xmlStrcmp(type, BAD_CAST "general")) + { + (*term)->which = Z_Term_general; + (*term)->u.general = + odr_create_Odr_oct(odr, (unsigned char *)cdata, strlen(cdata)); + } + else if (!xmlStrcmp(type, BAD_CAST "numeric")) + { + (*term)->which = Z_Term_numeric; + (*term)->u.numeric = intVal(odr, cdata); + } + else if (!xmlStrcmp(type, BAD_CAST "string")) + { + (*term)->which = Z_Term_characterString; + (*term)->u.characterString = cdata; + } + else if (!xmlStrcmp(type, BAD_CAST "oid")) + { + *error_code = 1; + *addinfo = "unhandled term type: oid"; + } + else if (!xmlStrcmp(type, BAD_CAST "dateTime")) + { + *error_code = 1; + *addinfo = "unhandled term type: dateTime"; + } + else if (!xmlStrcmp(type, BAD_CAST "integerAndUnit")) + { + *error_code = 1; + *addinfo = "unhandled term type: integerAndUnit"; + } + else if (!xmlStrcmp(type, BAD_CAST "null")) + { + (*term)->which = Z_Term_null; + (*term)->u.null = odr_nullval(); + } + else + { + *error_code = 1; + *addinfo = "unhandled term type"; + } +} + +void yaz_xml2query_apt(const xmlNode *ptr_apt, + Z_AttributesPlusTerm **zapt, ODR odr, + int *error_code, const char **addinfo) +{ + const xmlNode *ptr = ptr_apt->children; + int i, num_attr = 0; + + *zapt = (Z_AttributesPlusTerm *) + odr_malloc(odr, sizeof(Z_AttributesPlusTerm)); + + /* deal with attributes */ + (*zapt)->attributes = (Z_AttributeList*) + odr_malloc(odr, sizeof(Z_AttributeList)); + + /* how many attributes? */ + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!xmlStrcmp(ptr->name, BAD_CAST "attr")) + num_attr++; + else + break; + } + + /* allocate and parse for real */ + (*zapt)->attributes->num_attributes = num_attr; + (*zapt)->attributes->attributes = (Z_AttributeElement **) + odr_malloc(odr, sizeof(Z_AttributeElement*) * num_attr); + + i = 0; + ptr = ptr_apt->children; + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!xmlStrcmp(ptr->name, BAD_CAST "attr")) + { + yaz_xml2query_attribute_element( + ptr, &(*zapt)->attributes->attributes[i], odr, + error_code, addinfo); + i++; + } + else + break; + } + if (ptr && ptr->type == XML_ELEMENT_NODE) + { + if (!xmlStrcmp(ptr->name, BAD_CAST "term")) + { + /* deal with term */ + yaz_xml2query_term(ptr, &(*zapt)->term, odr, error_code, addinfo); + } + else + { + *error_code = 1; + *addinfo = "bad element in apt content"; + } + } + else + { + *error_code = 1; + *addinfo = "missing term node in apt content"; + } +} + +void yaz_xml2query_rset(const xmlNode *ptr, Z_ResultSetId **rset, + ODR odr, int *error_code, const char **addinfo) +{ + if (ptr->children) + { + *rset = strVal(ptr->children, odr); + } + else + { + *error_code = 1; + *addinfo = "missing rset content"; + } +} + +void yaz_xml2query_rpnstructure(const xmlNode *ptr, Z_RPNStructure **zs, + ODR odr, int *error_code, const char **addinfo) +{ + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + + if (!ptr || ptr->type != XML_ELEMENT_NODE) + { + *error_code = 1; + *addinfo = "missing rpn structure node"; + return; + } + *zs = (Z_RPNStructure *) odr_malloc(odr, sizeof(Z_RPNStructure)); + if (!xmlStrcmp(ptr->name, BAD_CAST "binary")) + { + Z_Complex *zc = odr_malloc(odr, sizeof(Z_Complex)); + + (*zs)->which = Z_RPNStructure_complex; + (*zs)->u.complex = zc; + + yaz_xml2query_operator(ptr, &zc->roperator, odr, error_code, addinfo); + + ptr = ptr->children; + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + yaz_xml2query_rpnstructure(ptr, &zc->s1, odr, error_code, addinfo); + if (ptr) + ptr = ptr->next; + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + yaz_xml2query_rpnstructure(ptr, &zc->s2, odr, error_code, addinfo); + } + else + { + Z_Operand *s = (Z_Operand *) odr_malloc(odr, sizeof(Z_Operand)); + (*zs)->which = Z_RPNStructure_simple; + (*zs)->u.simple = s; + if (!xmlStrcmp(ptr->name, BAD_CAST "apt")) + { + s->which = Z_Operand_APT; + yaz_xml2query_apt(ptr, &s->u.attributesPlusTerm, + odr, error_code, addinfo); + } + else if (!xmlStrcmp(ptr->name, BAD_CAST "rset")) + { + s->which = Z_Operand_resultSetId; + yaz_xml2query_rset(ptr, &s->u.resultSetId, + odr, error_code, addinfo); + } + else + { + *error_code = 1; + *addinfo = "bad element: expected binary, apt or rset"; + } + } +} + +void yaz_xml2query_rpn(const xmlNode *ptr, Z_RPNQuery **query, ODR odr, + int *error_code, const char **addinfo) +{ + const char *set = (const char *) + xmlGetProp((xmlNodePtr) ptr, BAD_CAST "set"); + + *query = (Z_RPNQuery*) odr_malloc(odr, sizeof(Z_RPNQuery)); + if (set) + (*query)->attributeSetId = yaz_str_to_z3950oid(odr, CLASS_ATTSET, set); + else + (*query)->attributeSetId = 0; + yaz_xml2query_rpnstructure(ptr->children, &(*query)->RPNStructure, + odr, error_code, addinfo); +} + +static void yaz_xml2query_(const xmlNode *ptr, Z_Query **query, ODR odr, + int *error_code, const char **addinfo) +{ + if (ptr && ptr->type == XML_ELEMENT_NODE && + !xmlStrcmp(ptr->name, BAD_CAST "query")) + { + const char *type = (const char *) + xmlGetProp((xmlNodePtr) ptr, BAD_CAST "type"); + *query = (Z_Query*) odr_malloc(odr, sizeof(Z_Query)); + if (!type || !strcmp(type, "rpn")) + { + (*query)->which = Z_Query_type_1; + yaz_xml2query_rpn(ptr, &(*query)->u.type_1, odr, + error_code, addinfo); + } + else if (!strcmp(type, "ccl")) + { + *error_code = 1; + *addinfo = "ccl not supported yet"; + } + else if (!strcmp(type, "z39.58")) + { + *error_code = 1; + *addinfo = "z39.58 not supported yet"; + } + else if (!strcmp(type, "cql")) + { + *error_code = 1; + *addinfo = "cql not supported yet"; + } + else + { + *error_code = 1; + *addinfo = "unsupported query type"; + } + } + else + { + *error_code = 1; + *addinfo = "missing query element"; + } +} + +void yaz_xml2query(const void *xmlnodep, Z_Query **query, ODR odr, + int *error_code, const char **addinfo) +{ + return yaz_xml2query_(xmlnodep, query, odr, error_code, addinfo); +} /* HAVE_XML2 */ #endif diff --git a/test/tstxmlquery.c b/test/tstxmlquery.c index fb0fddb..7ed5477 100644 --- a/test/tstxmlquery.c +++ b/test/tstxmlquery.c @@ -2,13 +2,14 @@ * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. * - * $Id: tstxmlquery.c,v 1.8 2006-02-02 15:00:58 adam Exp $ + * $Id: tstxmlquery.c,v 1.9 2006-02-19 18:44:23 adam Exp $ */ #include #include #include +#include #include #include #include @@ -26,7 +27,8 @@ enum pqf2xml_status { XML_NO_ERROR }; -enum pqf2xml_status pqf2xml_text(const char *pqf, const char *expect_xml) +enum pqf2xml_status pqf2xml_text(const char *pqf, const char *expect_xml, + const char *expect_pqf) { YAZ_PQF_Parser parser = yaz_pqf_create(); ODR odr = odr_createmem(ODR_ENCODE); @@ -62,7 +64,30 @@ enum pqf2xml_status pqf2xml_text(const char *pqf, const char *expect_xml) if (len_out == strlen(expect_xml) && memcmp(buf_out, expect_xml, len_out) == 0) { - status = XML_MATCH; + Z_Query *query2 = 0; + int error_code = 0; + const char *addinfo = 0; + const xmlNode *root_element = xmlDocGetRootElement(doc); + ODR odr2 = odr_createmem(ODR_ENCODE); + + yaz_xml2query(root_element, &query2, odr2, + &error_code, &addinfo); + if (error_code || !query2) + status = XML_NO_MATCH; + else + { + WRBUF w = wrbuf_alloc(); + yaz_query_to_wrbuf(w, query2); + if (!expect_pqf || strcmp(expect_pqf, wrbuf_buf(w)) == 0) + status = XML_MATCH; + else + { + status = XML_NO_MATCH; + printf("Result: %s\n", wrbuf_buf(w)); + } + wrbuf_free(w, 1); + } + odr_destroy(odr2); } else { @@ -79,9 +104,9 @@ enum pqf2xml_status pqf2xml_text(const char *pqf, const char *expect_xml) return status; } -void tst() +static void tst() { - YAZ_CHECK_EQ(pqf2xml_text("@attr 1=4 bad query", ""), PQF_FAILED); + YAZ_CHECK_EQ(pqf2xml_text("@attr 1=4 bad query", "", 0), PQF_FAILED); #if HAVE_XML2 YAZ_CHECK_EQ(pqf2xml_text( "@attr 1=4 computer", @@ -89,7 +114,9 @@ void tst() "" "" "computer" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @attr 1=4 computer" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@attr 2=1 @attr 1=title computer", @@ -98,7 +125,9 @@ void tst() "" "" "computer" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @attr \"1=title\" @attr 2=1 computer" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@attr 2=1 @attr exp1 1=1 computer", @@ -107,7 +136,9 @@ void tst() "" "" "computer" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @attr Exp-1 1=1 @attr 2=1 computer" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@and a b", @@ -116,7 +147,9 @@ void tst() "" "a" "b" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @and a b" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@or @and a b c", @@ -127,13 +160,17 @@ void tst() "a" "b" "c" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @or @and a b c" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@set abe", "\n" "" - "abe\n"), XML_MATCH); + "abe\n", + "RPN: @attrset Bib-1 @set abe" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( /* exclusion, distance, ordered, relationtype, @@ -148,7 +185,9 @@ void tst() "knownProximityUnit=\"2\">" "a" "b" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @prox 0 3 1 2 k 2 a b" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@term numeric 32", @@ -156,7 +195,9 @@ void tst() "" "" "32" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @term numeric 32" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@term string computer", @@ -164,7 +205,9 @@ void tst() "" "" "computer" - "\n"), XML_MATCH); + "\n", + "RPN: @attrset Bib-1 @term string computer" + ), XML_MATCH); YAZ_CHECK_EQ(pqf2xml_text( "@term null void", @@ -172,8 +215,20 @@ void tst() "" "" "" - "\n"), XML_MATCH); - + "\n", + "RPN: @attrset Bib-1 @term null x" + ), XML_MATCH); + + YAZ_CHECK_EQ(pqf2xml_text( + "@attrset gils @attr 4=2 x", + "\n" + "" + "" + "" + "x" + "\n", + "RPN: @attrset GILS @attr 4=2 x" + ), XML_MATCH); #endif } -- 1.7.10.4