From 786269f1247effe4194494c834b5d7043417d0d5 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 2 Sep 2002 13:59:07 +0000 Subject: [PATCH] New PQF API. Old API preserved. --- CHANGELOG | 7 +- client/client.c | 36 ++++++--- include/yaz/pquery.h | 70 ++++++++--------- zutil/pquery.c | 205 ++++++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 243 insertions(+), 75 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 378672b..6f5ef26 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,14 @@ Possible compatibility problems with earlier versions marked with '*'. +PQF parser rejects bad queries - including those with extra +characters in them. PQF parser used to silently ignore that. +Cleaner API for PQF added, which allows you to get detailed +error information for bad queries (see yaz/pquery.h). + Implemented mini iconv library supporting conversions between UTF-8, UCS4, UCS4LE and ISO-8859-1. Implemented in util/siconv.c. -Removed XML reader (d1_expat.c). It's part of Zebra instead. +* Removed XML reader (d1_expat.c). It's part of Zebra instead. --- 1.8.9 2002/08/20 diff --git a/client/client.c b/client/client.c index 7ad496c..ad98a3a 100644 --- a/client/client.c +++ b/client/client.c @@ -2,7 +2,7 @@ * Copyright (c) 1995-2002, Index Data * See the file LICENSE for details. * - * $Id: client.c,v 1.165 2002-08-30 09:06:42 adam Exp $ + * $Id: client.c,v 1.166 2002-09-02 13:59:07 adam Exp $ */ #include @@ -898,6 +898,7 @@ static int send_searchRequest(char *arg) char setstring[100]; Z_RPNQuery *RPNquery; Odr_oct ccl_query; + YAZ_PQF_Parser pqf_parser; if (queryType == QueryType_CCL2RPN) { @@ -951,12 +952,20 @@ static int send_searchRequest(char *arg) { case QueryType_Prefix: query.which = Z_Query_type_1; - RPNquery = p_query_rpn (out, protocol, arg); + pqf_parser = yaz_pqf_create (); + RPNquery = yaz_pqf_parse (pqf_parser, out, arg); if (!RPNquery) { - printf("Prefix query error\n"); + const char *pqf_msg; + size_t off; + int code = yaz_pqf_error (pqf_parser, &pqf_msg, &off); + printf("%*s^\n", off+4, ""); + printf("Prefix query error: %s (code %d)\n", pqf_msg, code); + + yaz_pqf_destroy (pqf_parser); return 0; } + yaz_pqf_destroy (pqf_parser); query.u.type_1 = RPNquery; break; case QueryType_CCL: @@ -1852,7 +1861,6 @@ int send_scanrequest(const char *query, int pp, int num, const char *term) { Z_APDU *apdu = zget_APDU(out, Z_APDU_scanRequest); Z_ScanRequest *req = apdu->u.scanRequest; - int use_rpn = 1; int oid[OID_SIZE]; if (queryType == QueryType_CCL2RPN) @@ -1867,7 +1875,6 @@ int send_scanrequest(const char *query, int pp, int num, const char *term) printf("CCL ERROR: %s\n", ccl_err_msg(error)); return -1; } - use_rpn = 0; bib1.proto = PROTO_Z3950; bib1.oclass = CLASS_ATTSET; bib1.value = VAL_BIB1; @@ -1879,11 +1886,22 @@ int send_scanrequest(const char *query, int pp, int num, const char *term) } ccl_rpn_delete (rpn); } - if (use_rpn && !(req->termListAndStartPoint = - p_query_scan(out, protocol, &req->attributeSet, query))) + else { - printf("Prefix query error\n"); - return -1; + YAZ_PQF_Parser pqf_parser = yaz_pqf_create (); + + if (!(req->termListAndStartPoint = + yaz_pqf_scan(pqf_parser, out, &req->attributeSet, query))) + { + const char *pqf_msg; + size_t off; + int code = yaz_pqf_error (pqf_parser, &pqf_msg, &off); + printf("%*s^\n", off+7, ""); + printf("Prefix query error: %s (code %d)\n", pqf_msg, code); + yaz_pqf_destroy (pqf_parser); + return -1; + } + yaz_pqf_destroy (pqf_parser); } if (term && *term) { diff --git a/include/yaz/pquery.h b/include/yaz/pquery.h index b267518..5237b7e 100644 --- a/include/yaz/pquery.h +++ b/include/yaz/pquery.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2000, Index Data. + * Copyright (c) 1995-2002, Index Data. * * Permission to use, copy, modify, distribute, and sell this software and * its documentation, in whole or in part, for any purpose, is hereby granted, @@ -23,39 +23,7 @@ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * - * $Log: pquery.h,v $ - * Revision 1.2 2000-02-28 11:20:06 adam - * Using autoconf. New definitions: YAZ_BEGIN_CDECL/YAZ_END_CDECL. - * - * Revision 1.1 1999/11/30 13:47:11 adam - * Improved installation. Moved header files to include/yaz. - * - * Revision 1.8 1997/09/01 08:49:50 adam - * New windows NT/95 port using MSV5.0. To export DLL functions the - * YAZ_EXPORT modifier was added. Defined in yconfig.h. - * - * Revision 1.7 1997/05/14 06:53:43 adam - * C++ support. - * - * Revision 1.6 1996/08/12 14:09:24 adam - * Default prefix query attribute set defined by using p_query_attset. - * - * Revision 1.5 1996/03/15 11:01:46 adam - * Extra argument to p_query_rpn: protocol. - * Extra arguments to p_query_scan: protocol and attributeSet. - * - * Revision 1.4 1995/09/29 17:12:05 quinn - * Smallish - * - * Revision 1.3 1995/09/27 15:02:49 quinn - * Modified function heads & prototypes. - * - * Revision 1.2 1995/05/26 08:56:05 adam - * New function: p_query_scan. - * - * Revision 1.1 1995/05/22 15:31:05 adam - * New function, p_query_rpn, to convert from prefix (ascii) to rpn (asn). - * + * $Id: pquery.h,v 1.3 2002-09-02 13:59:07 adam Exp $ */ #ifndef PQUERY_H @@ -66,12 +34,46 @@ YAZ_BEGIN_CDECL +typedef struct yaz_pqf_parser *YAZ_PQF_Parser; + YAZ_EXPORT Z_RPNQuery *p_query_rpn (ODR o, oid_proto proto, const char *qbuf); YAZ_EXPORT Z_AttributesPlusTerm *p_query_scan (ODR o, oid_proto proto, Odr_oid **attributeSetP, const char *qbuf); YAZ_EXPORT int p_query_attset (const char *arg); +YAZ_EXPORT YAZ_PQF_Parser yaz_pqf_create (void); +YAZ_EXPORT Z_RPNQuery *yaz_pqf_parse (YAZ_PQF_Parser p, ODR o, + const char *qbuf); +YAZ_EXPORT Z_AttributesPlusTerm *yaz_pqf_scan (YAZ_PQF_Parser p, ODR o, + Odr_oid **attributeSetId, + const char *qbuf); +YAZ_EXPORT void yaz_pqf_destroy (YAZ_PQF_Parser p); + +YAZ_EXPORT int yaz_pqf_error (YAZ_PQF_Parser p, const char **msg, size_t *off); + + +/* no error */ +#define YAZ_PQF_ERROR_NONE 0 + +/* extra token (end of query expected) */ +#define YAZ_PQF_ERROR_EXTRA 1 + +/* missing token (at least one token expected) */ +#define YAZ_PQF_ERROR_MISSING 2 + +/* bad attribute set (for @attr and @attrset) */ +#define YAZ_PQF_ERROR_ATTSET 3 + +/* too many items (limit reached - too many attributes, etc) */ +#define YAZ_PQF_ERROR_TOOMANY 4 + +/* bad format of attribute (missing =) */ +#define YAZ_PQF_ERROR_BADATTR 5 + +/* internal failure */ +#define YAZ_PQF_ERROR_INTERNAL 6 + YAZ_END_CDECL #endif diff --git a/zutil/pquery.c b/zutil/pquery.c index 6fd0fc0..2ced379 100644 --- a/zutil/pquery.c +++ b/zutil/pquery.c @@ -2,7 +2,7 @@ * Copyright (c) 1995-2002, Index Data. * See the file LICENSE for details. * - * $Id: pquery.c,v 1.16 2002-07-25 12:48:39 adam Exp $ + * $Id: pquery.c,v 1.17 2002-09-02 13:59:07 adam Exp $ */ #include @@ -16,8 +16,9 @@ static oid_value p_query_dfset = VAL_NONE; -struct lex_info { +struct yaz_pqf_parser { const char *query_buf; + const char *query_ptr; const char *lex_buf; size_t lex_len; int query_look; @@ -25,14 +26,16 @@ struct lex_info { char *right_sep; int escape_char; int term_type; + int error; }; -static Z_RPNStructure *rpn_structure (struct lex_info *li, ODR o, oid_proto, +static Z_RPNStructure *rpn_structure (struct yaz_pqf_parser *li, ODR o, + oid_proto, int num_attr, int max_attr, int *attr_list, char **attr_clist, oid_value *attr_set); -static enum oid_value query_oid_getvalbyname (struct lex_info *li) +static enum oid_value query_oid_getvalbyname (struct yaz_pqf_parser *li) { enum oid_value value; char buf[32]; @@ -45,20 +48,21 @@ static enum oid_value query_oid_getvalbyname (struct lex_info *li) return value; } -static int compare_term (struct lex_info *li, const char *src, size_t off) +static int compare_term (struct yaz_pqf_parser *li, const char *src, + size_t off) { size_t len=strlen(src); - + if (li->lex_len == len+off && !memcmp (li->lex_buf+off, src, len-off)) return 1; return 0; } -static int query_token (struct lex_info *li) +static int query_token (struct yaz_pqf_parser *li) { int sep_char = ' '; const char *sep_match; - const char **qptr = &li->query_buf; + const char **qptr = &li->query_ptr; while (**qptr == ' ') (*qptr)++; @@ -113,7 +117,7 @@ static int query_token (struct lex_info *li) return 't'; } -static int lex (struct lex_info *li) +static int lex (struct yaz_pqf_parser *li) { return li->query_look = query_token (li); } @@ -181,7 +185,7 @@ static int escape_string(char *out_buf, const char *in, int len) return out - out_buf; } -static int p_query_parse_attr(struct lex_info *li, ODR o, +static int p_query_parse_attr(struct yaz_pqf_parser *li, ODR o, int num_attr, int *attr_list, char **attr_clist, oid_value *attr_set) { @@ -191,11 +195,20 @@ static int p_query_parse_attr(struct lex_info *li, ODR o, { attr_set[num_attr] = query_oid_getvalbyname (li); if (attr_set[num_attr] == VAL_NONE) + { + li->error = YAZ_PQF_ERROR_ATTSET; return 0; - lex (li); - + } + if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; + return 0; + } if (!(cp = strchr (li->lex_buf, '='))) + { + li->error = YAZ_PQF_ERROR_BADATTR; return 0; + } } else { @@ -222,7 +235,7 @@ static int p_query_parse_attr(struct lex_info *li, ODR o, return 1; } -static Z_AttributesPlusTerm *rpn_term (struct lex_info *li, ODR o, +static Z_AttributesPlusTerm *rpn_term (struct yaz_pqf_parser *li, ODR o, oid_proto proto, int num_attr, int *attr_list, char **attr_clist, oid_value *attr_set) @@ -329,7 +342,7 @@ static Z_AttributesPlusTerm *rpn_term (struct lex_info *li, ODR o, return zapt; } -static Z_Operand *rpn_simple (struct lex_info *li, ODR o, oid_proto proto, +static Z_Operand *rpn_simple (struct yaz_pqf_parser *li, ODR o, oid_proto proto, int num_attr, int *attr_list, char **attr_clist, oid_value *attr_set) { @@ -349,7 +362,10 @@ static Z_Operand *rpn_simple (struct lex_info *li, ODR o, oid_proto proto, case 's': lex (li); if (!li->query_look) + { + li->error = YAZ_PQF_ERROR_MISSING; return 0; + } zo->which = Z_Operand_resultSetId; zo->u.resultSetId = (char *)odr_malloc (o, li->lex_len+1); memcpy (zo->u.resultSetId, li->lex_buf, li->lex_len); @@ -357,17 +373,23 @@ static Z_Operand *rpn_simple (struct lex_info *li, ODR o, oid_proto proto, lex (li); break; default: + /* we're only called if one of the above types are seens so + this shouldn't happen */ + li->error = YAZ_PQF_ERROR_INTERNAL; return 0; } return zo; } -static Z_ProximityOperator *rpn_proximity (struct lex_info *li, ODR o) +static Z_ProximityOperator *rpn_proximity (struct yaz_pqf_parser *li, ODR o) { Z_ProximityOperator *p = (Z_ProximityOperator *)odr_malloc (o, sizeof(*p)); if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } if (*li->lex_buf == '1') { p->exclusion = (int *)odr_malloc (o, sizeof(*p->exclusion)); @@ -382,22 +404,34 @@ static Z_ProximityOperator *rpn_proximity (struct lex_info *li, ODR o) p->exclusion = NULL; if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } p->distance = (int *)odr_malloc (o, sizeof(*p->distance)); *p->distance = atoi (li->lex_buf); if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } p->ordered = (int *)odr_malloc (o, sizeof(*p->ordered)); *p->ordered = atoi (li->lex_buf); if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } p->relationType = (int *)odr_malloc (o, sizeof(*p->relationType)); *p->relationType = atoi (li->lex_buf); if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } if (*li->lex_buf == 'k') p->which = 0; else if (*li->lex_buf == 'p') @@ -406,14 +440,17 @@ static Z_ProximityOperator *rpn_proximity (struct lex_info *li, ODR o) p->which = atoi (li->lex_buf); if (!lex (li)) + { + li->error = YAZ_PQF_ERROR_MISSING; return NULL; + } p->which = Z_ProximityOperator_known; p->u.known = (int *)odr_malloc (o, sizeof(*p->u.known)); *p->u.known = atoi (li->lex_buf); return p; } -static Z_Complex *rpn_complex (struct lex_info *li, ODR o, oid_proto proto, +static Z_Complex *rpn_complex (struct yaz_pqf_parser *li, ODR o, oid_proto proto, int num_attr, int max_attr, int *attr_list, char **attr_clist, oid_value *attr_set) @@ -445,6 +482,9 @@ static Z_Complex *rpn_complex (struct lex_info *li, ODR o, oid_proto proto, return NULL; break; default: + /* we're only called if one of the above types are seens so + this shouldn't happen */ + li->error = YAZ_PQF_ERROR_INTERNAL; return NULL; } lex (li); @@ -459,7 +499,7 @@ static Z_Complex *rpn_complex (struct lex_info *li, ODR o, oid_proto proto, return zc; } -static void rpn_term_type (struct lex_info *li, ODR o) +static void rpn_term_type (struct yaz_pqf_parser *li, ODR o) { if (!li->query_look) return ; @@ -478,7 +518,7 @@ static void rpn_term_type (struct lex_info *li, ODR o) lex (li); } -static Z_RPNStructure *rpn_structure (struct lex_info *li, ODR o, +static Z_RPNStructure *rpn_structure (struct yaz_pqf_parser *li, ODR o, oid_proto proto, int num_attr, int max_attr, int *attr_list, @@ -511,9 +551,15 @@ static Z_RPNStructure *rpn_structure (struct lex_info *li, ODR o, case 'l': lex (li); if (!li->query_look) - return NULL; + { + li->error = YAZ_PQF_ERROR_MISSING; + return 0; + } if (num_attr >= max_attr) - return NULL; + { + li->error = YAZ_PQF_ERROR_TOOMANY; + return 0; + } if (!p_query_parse_attr(li, o, num_attr, attr_list, attr_clist, attr_set)) return 0; @@ -529,12 +575,13 @@ static Z_RPNStructure *rpn_structure (struct lex_info *li, ODR o, rpn_structure (li, o, proto, num_attr, max_attr, attr_list, attr_clist, attr_set); case 0: /* operator/operand expected! */ - return NULL; + li->error = YAZ_PQF_ERROR_MISSING; + return 0; } return sz; } -Z_RPNQuery *p_query_rpn_mk (ODR o, struct lex_info *li, oid_proto proto, +Z_RPNQuery *p_query_rpn_mk (ODR o, struct yaz_pqf_parser *li, oid_proto proto, const char *qbuf) { Z_RPNQuery *zq; @@ -550,7 +597,10 @@ Z_RPNQuery *p_query_rpn_mk (ODR o, struct lex_info *li, oid_proto proto, lex (li); topSet = query_oid_getvalbyname (li); if (topSet == VAL_NONE) + { + li->error = YAZ_PQF_ERROR_ATTSET; return NULL; + } lex (li); } @@ -562,29 +612,39 @@ Z_RPNQuery *p_query_rpn_mk (ODR o, struct lex_info *li, oid_proto proto, zq->attributeSetId = yaz_oidval_to_z3950oid(o, CLASS_ATTSET, topSet); if (!zq->attributeSetId) + { + li->error = YAZ_PQF_ERROR_ATTSET; return 0; + } if (!(zq->RPNStructure = rpn_structure (li, o, proto, 0, 512, attr_array, attr_clist, attr_set))) - return NULL; + return 0; + if (li->query_look) + { + li->error = YAZ_PQF_ERROR_EXTRA; + return 0; + } return zq; } Z_RPNQuery *p_query_rpn (ODR o, oid_proto proto, const char *qbuf) { - struct lex_info li; - + struct yaz_pqf_parser li; + + li.error = 0; li.left_sep = "{\""; li.right_sep = "}\""; li.escape_char = '@'; li.term_type = Z_Term_general; - li.query_buf = qbuf; + li.query_buf = li.query_ptr = qbuf; + li.lex_buf = 0; return p_query_rpn_mk (o, &li, proto, qbuf); } -Z_AttributesPlusTerm *p_query_scan_mk (struct lex_info *li, +Z_AttributesPlusTerm *p_query_scan_mk (struct yaz_pqf_parser *li, ODR o, oid_proto proto, Odr_oid **attributeSetP, const char *qbuf) @@ -595,6 +655,7 @@ Z_AttributesPlusTerm *p_query_scan_mk (struct lex_info *li, int num_attr = 0; int max_attr = 512; oid_value topSet = VAL_NONE; + Z_AttributesPlusTerm *apt; lex (li); if (li->query_look == 'r') @@ -617,9 +678,15 @@ Z_AttributesPlusTerm *p_query_scan_mk (struct lex_info *li, { lex (li); if (!li->query_look) + { + li->error = YAZ_PQF_ERROR_MISSING; return 0; + } if (num_attr >= max_attr) + { + li->error = YAZ_PQF_ERROR_TOOMANY; return 0; + } if (!p_query_parse_attr(li, o, num_attr, attr_list, attr_clist, attr_set)) return 0; @@ -635,21 +702,35 @@ Z_AttributesPlusTerm *p_query_scan_mk (struct lex_info *li, break; } if (!li->query_look) - return NULL; - return rpn_term (li, o, proto, num_attr, attr_list, attr_clist, attr_set); + { + li->error = YAZ_PQF_ERROR_MISSING; + return 0; + } + apt = rpn_term (li, o, proto, num_attr, attr_list, attr_clist, attr_set); + + lex (li); + + if (li->query_look != 0) + { + li->error = YAZ_PQF_ERROR_EXTRA; + return 0; + } + return apt; } Z_AttributesPlusTerm *p_query_scan (ODR o, oid_proto proto, Odr_oid **attributeSetP, const char *qbuf) { - struct lex_info li; + struct yaz_pqf_parser li; + li.error = 0; li.left_sep = "{\""; li.right_sep = "}\""; li.escape_char = '@'; li.term_type = Z_Term_general; - li.query_buf = qbuf; + li.query_buf = li.query_ptr = qbuf; + li.lex_buf = 0; return p_query_scan_mk (&li, o, proto, attributeSetP, qbuf); } @@ -660,3 +741,65 @@ int p_query_attset (const char *arg) return (p_query_dfset == VAL_NONE) ? -1 : 0; } +YAZ_PQF_Parser yaz_pqf_create (void) +{ + YAZ_PQF_Parser p = xmalloc (sizeof(*p)); + + p->error = 0; + p->left_sep = "{\""; + p->right_sep = "}\""; + p->escape_char = '@'; + p->term_type = Z_Term_general; + + return p; +} + +void yaz_pqf_destroy (YAZ_PQF_Parser p) +{ + xfree (p); +} + +Z_RPNQuery *yaz_pqf_parse (YAZ_PQF_Parser p, ODR o, const char *qbuf) +{ + if (!p) + return 0; + p->query_buf = p->query_ptr = qbuf; + p->lex_buf = 0; + return p_query_rpn_mk (o, p, PROTO_Z3950, qbuf); +} + +Z_AttributesPlusTerm *yaz_pqf_scan (YAZ_PQF_Parser p, ODR o, + Odr_oid **attributeSetP, + const char *qbuf) +{ + if (!p) + return 0; + p->query_buf = p->query_ptr = qbuf; + p->lex_buf = 0; + return p_query_scan_mk (p, o, PROTO_Z3950, attributeSetP, qbuf); +} + +int yaz_pqf_error (YAZ_PQF_Parser p, const char **msg, size_t *off) +{ + switch (p->error) + { + case YAZ_PQF_ERROR_NONE: + *msg = "no error"; break; + case YAZ_PQF_ERROR_EXTRA: + *msg = "extra token"; break; + case YAZ_PQF_ERROR_MISSING: + *msg = "missing token"; break; + case YAZ_PQF_ERROR_ATTSET: + *msg = "unknown attribute set"; break; + case YAZ_PQF_ERROR_TOOMANY: + *msg = "too many attributes"; break; + case YAZ_PQF_ERROR_BADATTR: + *msg = "bad attribute specification"; break; + case YAZ_PQF_ERROR_INTERNAL: + *msg = "internal error"; break; + default: + *msg = "unknown error"; break; + } + *off = p->query_ptr - p->query_buf; + return p->error; +} -- 1.7.10.4