From: Adam Dickmeiss Date: Sat, 1 Mar 2003 22:45:37 +0000 (+0000) Subject: Predictive parser for X-Path predicates. X-Git-Tag: ZEBRA.1.3.8~53 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=b551622098261f77647eb99bbbd4b92a3285ea5f Predictive parser for X-Path predicates. Parser called zebra_parse_xpath_str instead of parse_xpath_str. --- diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 3e26fc5..12ea5d7 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.7 2003-02-25 21:50:27 adam Exp $ +/* $Id: d1_absyn.c,v 1.8 2003-03-01 22:45:37 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -701,10 +701,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, cur_xpelement->dfa = dfa; #ifdef ENHANCED_XELM - cur_xpelement->xpath_len = parse_xpath_str(xpath_expr, - cur_xpelement->xpath, - data1_nmem_get(dh)); - + cur_xpelement->xpath_len = + zebra_parse_xpath_str(xpath_expr, + cur_xpelement->xpath, XPATH_STEP_COUNT, + data1_nmem_get(dh)); + /* dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len); */ diff --git a/include/zebra_xpath.h b/include/zebra_xpath.h index d09bf16..fcaf727 100644 --- a/include/zebra_xpath.h +++ b/include/zebra_xpath.h @@ -27,8 +27,9 @@ struct xpath_location_step { struct xpath_predicate *predicate; }; -int parse_xpath_str(const char *xpath_string, - struct xpath_location_step *xpath, NMEM mem); +int zebra_parse_xpath_str(const char *xpath_string, + struct xpath_location_step *xpath, + int max, NMEM mem); void dump_xp_steps (struct xpath_location_step *xpath, int no); diff --git a/index/zrpn.c b/index/zrpn.c index 40815ac..3df18e1 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.129 2003-02-27 11:29:13 adam Exp $ +/* $Id: zrpn.c,v 1.130 2003-03-01 22:45:38 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -1300,7 +1300,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, parms.temp_path = res_get (zh->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); rsfd_result = rset_open (result, RSETF_WRITE); - + while (*more) { for (i = 1; inn = min_nn; @@ -2100,12 +2101,11 @@ struct xpath_location_step { static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, oid_value attributeSet, - struct xpath_location_step *xpath, NMEM mem) + struct xpath_location_step *xpath, int max, NMEM mem) { oid_value curAttributeSet = attributeSet; AttrType use; const char *use_string = 0; - int no = 0; attr_init (&use, zapt, 1); attr_find_ex (&use, &curAttributeSet, &use_string); @@ -2113,7 +2113,7 @@ static int parse_xpath(ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (!use_string || *use_string != '/') return -1; - return (parse_xpath_str(use_string, xpath, mem)); + return zebra_parse_xpath_str(use_string, xpath, max, mem); } @@ -2352,7 +2352,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt, if (sort_flag) return rpn_sort_spec (zh, zapt, attributeSet, stream, sort_sequence, rank_type); - xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, stream); + xpath_len = parse_xpath(zh, zapt, attributeSet, xpath, 10, stream); if (xpath_len >= 0) { xpath_use = 1016; diff --git a/util/xpath.c b/util/xpath.c index 43fe1d0..9007200 100644 --- a/util/xpath.c +++ b/util/xpath.c @@ -1,4 +1,4 @@ -/* $Id: xpath.c,v 1.2 2003-03-01 20:41:34 adam Exp $ +/* $Id: xpath.c,v 1.3 2003-03-01 22:45:38 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003 Index Data Aps @@ -28,123 +28,139 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -char *get_xp_part (char **strs, NMEM mem) +static char *get_xp_part (char **strs, NMEM mem, int *literal) { - char *str = *strs; - char *res = '\0'; - char *cp = str; - char *co; - int quoted = 0; - - /* ugly */ - char *sep = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\" "; - - while (*cp == ' ') {cp++; str++;} - if (!strchr("><=] ", *cp)) sep = "><=] "; - - while (*cp && !(strchr(sep,*cp) && !quoted) && (*cp != ']')) { - if (*cp =='"') quoted = 1 - quoted; + char *cp = *strs; + char *str = 0; + char *res = 0; + + *literal = 0; + while (*cp == ' ') + cp++; + str = cp; + if (strchr("()", *cp)) + cp++; + else if (strchr("><=", *cp)) + { + while (strchr("><=", *cp)) + cp++; + } + else if (*cp == '"' || *cp == '\'') + { + int sep = *cp; + str++; cp++; - } - /* removing leading and trailing " */ - co = cp; - if (*str == '"') str++; - if (*(cp-1) == '"') cp--; - if (str < co) { + while (*cp && *cp != sep) + cp++; res = nmem_malloc(mem, cp - str + 1); - memcpy (res, str, (cp-str)); - *(res + (cp-str)) = '\0'; - *strs = co; + if ((cp - str)) + memcpy (res, str, (cp-str)); + res[cp-str] = '\0'; + if (*cp) + cp++; + *literal = 1; } - - return (res); + else + { + while (*cp && !strchr("><=()]\" ", *cp)) + cp++; + } + if (!res) + { + res = nmem_malloc(mem, cp - str + 1); + if ((cp - str)) + memcpy (res, str, (cp-str)); + res[cp-str] = '\0'; + } + *strs = cp; + return res; } +static struct xpath_predicate *get_xpath_boolean(char **pr, NMEM mem, + char **look, int *literal); -struct xpath_predicate *get_xpath_predicate(char *predicates, NMEM mem) +static struct xpath_predicate *get_xpath_relation(char **pr, NMEM mem, + char **look, int *literal) { - char *p1; - char *p2; - char *p3; - char *p4; - - struct xpath_predicate *r1; - struct xpath_predicate *r2; struct xpath_predicate *res = 0; + if (!*literal && !strcmp(*look, "(")) + { + *look = get_xp_part(pr, mem, literal); + res = get_xpath_boolean(pr, mem, look, literal); + if (!strcmp(*look, ")")) + *look = get_xp_part(pr, mem, literal); + else + res = 0; /* error */ + } + else + { + res=nmem_malloc(mem, sizeof(struct xpath_predicate)); + res->which = XPATH_PREDICATE_RELATION; + res->u.relation.name = *look; + + *look = get_xp_part(pr, mem, literal); + if (*look && !*literal && strchr("><=", **look)) + { + res->u.relation.op = *look; + + *look = get_xp_part(pr, mem, literal); + if (!*look) + return 0; /* error */ + res->u.relation.value = *look; + *look = get_xp_part(pr, mem, literal); + } + else + { + res->u.relation.op = ""; + res->u.relation.value = ""; + } + } + return res; +} + +static struct xpath_predicate *get_xpath_boolean(char **pr, NMEM mem, + char **look, int *literal) +{ + struct xpath_predicate *left = 0; - char *pr = predicates; + left = get_xpath_relation(pr, mem, look, literal); + if (!left) + return 0; - if ((p1 = get_xp_part(&pr, mem))) { - if ((p2 = get_xp_part(&pr, mem))) { - if (!strcmp (p2, "and") || !strcmp (p2, "or") || !strcmp (p2, "not")) { - r1=nmem_malloc(mem, sizeof(struct xpath_predicate)); - r1->which = XPATH_PREDICATE_RELATION; - r1->u.relation.name = p1; - r1->u.relation.op = ""; - r1->u.relation.value = ""; - - r2 = get_xpath_predicate (pr, mem); - - res = nmem_malloc(mem, sizeof(struct xpath_predicate)); - res->which = XPATH_PREDICATE_BOOLEAN; - res->u.boolean.op = p2; - res->u.boolean.left = r1; - res->u.boolean.right = r2; - - return (res); - } - - if (strchr("><=] ", *p2)) { - r1 = nmem_malloc(mem, sizeof(struct xpath_predicate)); - - r1->which = XPATH_PREDICATE_RELATION; - r1->u.relation.name = p1; - r1->u.relation.op = p2; - r1->u.relation.value = ""; - - if ((p3 = get_xp_part(&pr, mem))) { - r1->u.relation.value = p3; - } else { - /* error */ - } - } - - if ((p4 = get_xp_part(&pr, mem))) { - if (!strcmp (p4, "and") || !strcmp (p4, "or") || - !strcmp (p4, "not")) - { - - r2 = get_xpath_predicate (pr, mem); - - res = nmem_malloc(mem, sizeof(struct xpath_predicate)); - res->which = XPATH_PREDICATE_BOOLEAN; - res->u.boolean.op = p4; - res->u.boolean.left = r1; - res->u.boolean.right = r2; - return (res); - } else { - /* error */ - } - } else { - return (r1); - } - - } else { - r1 = nmem_malloc(mem, sizeof(struct xpath_predicate)); - - r1->which = XPATH_PREDICATE_RELATION; - r1->u.relation.name = p1; - r1->u.relation.op = ""; - r1->u.relation.value = ""; - - return (r1); - } + while (*look && !*literal && + (!strcmp(*look, "and") || !strcmp(*look, "or") || + !strcmp(*look, "not"))) + { + struct xpath_predicate *res, *right; + + res = nmem_malloc(mem, sizeof(struct xpath_predicate)); + res->which = XPATH_PREDICATE_BOOLEAN; + res->u.boolean.op = *look; + res->u.boolean.left = left; + + *look = get_xp_part(pr, mem, literal); /* skip the boolean name */ + right = get_xpath_relation(pr, mem, look, literal); + + res->u.boolean.right = right; + + left = res; } - return 0; + return left; } -int parse_xpath_str(const char *xpath_string, - struct xpath_location_step *xpath, NMEM mem) +static struct xpath_predicate *get_xpath_predicate(char *predicate, NMEM mem) +{ + int literal; + char **pr = &predicate; + char *look = get_xp_part(pr, mem, &literal); + + if (!look) + return 0; + return get_xpath_boolean(pr, mem, &look, &literal); +} + +int zebra_parse_xpath_str(const char *xpath_string, + struct xpath_location_step *xpath, int max, NMEM mem) { const char *cp; char *a; @@ -155,7 +171,7 @@ int parse_xpath_str(const char *xpath_string, return -1; cp = xpath_string; - while (*cp) + while (*cp && no < max) { int i = 0; while (*cp && !strchr("/[",*cp)) @@ -165,7 +181,8 @@ int parse_xpath_str(const char *xpath_string, } xpath[no].predicate = 0; xpath[no].part = nmem_malloc (mem, i+1); - memcpy (xpath[no].part, cp - i, i); + if (i) + memcpy (xpath[no].part, cp - i, i); xpath[no].part[i] = 0; if (*cp == '[') @@ -187,6 +204,12 @@ int parse_xpath_str(const char *xpath_string, break; cp++; } + +/* for debugging .. */ +#if 0 + dump_xp_steps(xpath, no); +#endif + return no; } @@ -194,7 +217,7 @@ void dump_xp_predicate (struct xpath_predicate *p) { if (p) { if (p->which == XPATH_PREDICATE_RELATION && - p->u.relation.name[0]) { + p->u.relation.name[0]) { fprintf (stderr, "%s,%s,%s", p->u.relation.name, p->u.relation.op,