From 953117d805ff5f53b2b3c541eff5ecd6461eba29 Mon Sep 17 00:00:00 2001 From: mike Date: Fri, 25 Oct 2002 16:56:43 +0000 Subject: [PATCH] First semi-working version. Can parse: foo or (bar and "baz") No qualifiers, no relations, no modifiers, no proximity :-) --- src/org/z3950/zing/cql/CQLBooleanNode.java | 18 +++++- src/org/z3950/zing/cql/CQLParser.java | 84 +++++++++++++++------------- src/org/z3950/zing/cql/CQLTermNode.java | 39 ++++++------- 3 files changed, 82 insertions(+), 59 deletions(-) diff --git a/src/org/z3950/zing/cql/CQLBooleanNode.java b/src/org/z3950/zing/cql/CQLBooleanNode.java index c56aac4..9908395 100644 --- a/src/org/z3950/zing/cql/CQLBooleanNode.java +++ b/src/org/z3950/zing/cql/CQLBooleanNode.java @@ -1,4 +1,4 @@ -// $Id: CQLBooleanNode.java,v 1.2 2002-10-25 16:11:05 mike Exp $ +// $Id: CQLBooleanNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $ package org.z3950.zing.cql; @@ -7,9 +7,23 @@ package org.z3950.zing.cql; * Represents a boolean node in a CQL parse-tree ... * ### * - * @version $Id: CQLBooleanNode.java,v 1.2 2002-10-25 16:11:05 mike Exp $ + * @version $Id: CQLBooleanNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $ */ public abstract class CQLBooleanNode extends CQLNode { protected CQLNode left; protected CQLNode right; + + abstract String op(); + + String toXCQL(int level) { + return(indent(level) + "\n" + + indent(level+1) + "" + op() + "\n" + + left.toXCQL(level+1) + + right.toXCQL(level+1) + + indent(level) + "\n"); + } + + String toCQL() { + return "(" + left.toCQL() + ") " + op() + " (" + right.toCQL() + ")"; + } } diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java index 7a3df72..c086cac 100644 --- a/src/org/z3950/zing/cql/CQLParser.java +++ b/src/org/z3950/zing/cql/CQLParser.java @@ -1,9 +1,8 @@ -// $Id: CQLParser.java,v 1.6 2002-10-25 16:11:05 mike Exp $ +// $Id: CQLParser.java,v 1.7 2002-10-25 16:56:43 mike Exp $ package org.z3950.zing.cql; import java.util.Properties; import java.io.InputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.StringReader; import java.io.StreamTokenizer; @@ -13,7 +12,7 @@ import java.io.StreamTokenizer; * Compiles a CQL string into a parse tree ... * ### * - * @version $Id: CQLParser.java,v 1.6 2002-10-25 16:11:05 mike Exp $ + * @version $Id: CQLParser.java,v 1.7 2002-10-25 16:56:43 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ @@ -30,49 +29,44 @@ public class CQLParser { } public CQLNode parse(String cql) - throws FileNotFoundException, IOException { + throws CQLParseException, IOException { this.cql = cql; st = new StreamTokenizer(new StringReader(cql)); - // ### these settings are wrong - st.wordChars('/', '/'); - st.wordChars('0', '9'); // ### but 1 is still recognised as TT_NUM - st.wordChars('.', '.'); - st.wordChars('-', '-'); st.ordinaryChar('='); - st.ordinaryChar(','); + st.ordinaryChar('<'); + st.ordinaryChar('>'); + st.ordinaryChar('/'); st.ordinaryChar('('); st.ordinaryChar(')'); -// int token; -// while ((token = st.nextToken()) != st.TT_EOF) { -// System.out.println("token=" + token + ", " + -// "nval=" + st.nval + ", " + -// "sval=" + st.sval); -// } - - st.nextToken(); - CQLNode root; - try { - root = parse_expression(); - } catch (CQLParseException ex) { - System.err.println("### Oops: " + ex); - return null; + if (false) { + // Lexical debug + int token; + while ((token = st.nextToken()) != st.TT_EOF) { + System.out.println("token=" + token + ", " + + "nval=" + st.nval + ", " + + "sval=" + st.sval); + } + System.exit(0); } - if (st.ttype != st.TT_EOF) { - System.err.println("### Extra bits: " + render(st)); - return null; - } + st.nextToken(); + System.err.println("*about to parse_query()"); + CQLNode root = parse_query(); + if (st.ttype != st.TT_EOF) + throw new CQLParseException("junk after end: " + render(st)); return root; } - private CQLNode parse_expression() + private CQLNode parse_query() throws CQLParseException, IOException { - CQLNode term = parse_term(); + System.err.println("*in parse_query()"); + CQLNode term = parse_term(); while (st.ttype == st.TT_WORD) { String op = st.sval.toLowerCase(); + System.err.println("*checking op '" + op + "'"); if (st.sval.equals("and")) { match(st.TT_WORD); CQLNode term2 = parse_term(); @@ -86,26 +80,35 @@ public class CQLParser { CQLNode term2 = parse_term(); term = new CQLNotNode(term, term2); } + // ### Need to handle "prox" } + System.err.println("*no more ops"); return term; } private CQLNode parse_term() throws CQLParseException, IOException { + System.err.println("*in parse_term()"); if (st.ttype == '(') { match('('); - CQLNode expr = parse_expression(); + CQLNode expr = parse_query(); match(')'); return expr; } + System.err.println("*not a parenthesised term"); + // ### Need to parse qualifier-relation pairs String word = st.sval; - return new CQLTermNode("x", "=", word); + match(st.ttype); + CQLTermNode node = new CQLTermNode("x", "=", word); + System.err.println("*made term node " + node); + return node; } private void match(int token) throws CQLParseException, IOException { + System.err.println("*in match(" + render(st, token, null) + ")"); if (st.ttype != token) throw new CQLParseException("expected " + render(st, token, null) + ", " + "got " + render(st)); @@ -125,11 +128,11 @@ public class CQLParser { } else if (token == st.TT_EOL) { return "EOL"; } else if (token == st.TT_NUMBER) { - return "number"; + return "number: " + st.nval; } else if (token == st.TT_WORD) { - return "word"; - } else if (token == '"' && token == '\'') { - return "string"; + return "word: \"" + st.sval + "\""; + } else if (token == '"' || token == '\'') { + return "string: \"" + st.sval + "\""; } return "'" + String.valueOf((char) token) + "'"; @@ -149,8 +152,9 @@ public class CQLParser { System.exit(1); } - byte[] bytes = new byte[1000]; + byte[] bytes = new byte[10000]; try { + // Read in the whole of standard input in one go int nbytes = System.in.read(bytes); } catch (java.io.IOException ex) { System.err.println("Can't read query: " + ex); @@ -161,10 +165,14 @@ public class CQLParser { CQLNode root; try { root = parser.parse(cql); + System.err.println("root='" + root + "'"); System.out.println(root.toXCQL(0)); + } catch (CQLParseException ex) { + System.err.println("Syntax error: " + ex); + System.exit(3); } catch (java.io.IOException ex) { System.err.println("Can't compile query: " + ex); - System.exit(3); + System.exit(4); } } } diff --git a/src/org/z3950/zing/cql/CQLTermNode.java b/src/org/z3950/zing/cql/CQLTermNode.java index 625aab0..64a0a84 100644 --- a/src/org/z3950/zing/cql/CQLTermNode.java +++ b/src/org/z3950/zing/cql/CQLTermNode.java @@ -1,4 +1,4 @@ -// $Id: CQLTermNode.java,v 1.2 2002-10-25 16:01:26 mike Exp $ +// $Id: CQLTermNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $ package org.z3950.zing.cql; @@ -7,48 +7,49 @@ package org.z3950.zing.cql; * Represents a terminal node in a CQL parse-tree ... * ### * - * @version $Id: CQLTermNode.java,v 1.2 2002-10-25 16:01:26 mike Exp $ + * @version $Id: CQLTermNode.java,v 1.3 2002-10-25 16:56:43 mike Exp $ */ public class CQLTermNode extends CQLNode { private String qualifier; private String relation; - private String value; + private String term; - public CQLTermNode(String qualifier, String relation, String value) { + public CQLTermNode(String qualifier, String relation, String term) { this.qualifier = qualifier; this.relation = relation; - this.value = value; + this.term = term; } String toXCQL(int level) { return (indent(level) + "\n" + indent(level+1) + "" + qualifier + "\n" + indent(level+1) + "" + relation + "\n" + - indent(level+1) + "" + value + "\n" + + indent(level+1) + "" + term + "\n" + indent(level) + "\n"); } String toCQL() { - String res = value; + String quotedTerm = term; - if (res.indexOf('"') != -1) { + if (quotedTerm.indexOf('"') != -1) { // ### precede each '"' with a '/' } - if (res.indexOf('"') != -1 || - res.indexOf(' ') != -1 || - res.indexOf('\t') != -1 || - res.indexOf('=') != -1 || - res.indexOf('<') != -1 || - res.indexOf('>') != -1 || - res.indexOf('/') != -1 || - res.indexOf('(') != -1 || - res.indexOf(')') != -1) { - res = '"' + res + '"'; + // ### There must be a better way ... + if (quotedTerm.indexOf('"') != -1 || + quotedTerm.indexOf(' ') != -1 || + quotedTerm.indexOf('\t') != -1 || + quotedTerm.indexOf('=') != -1 || + quotedTerm.indexOf('<') != -1 || + quotedTerm.indexOf('>') != -1 || + quotedTerm.indexOf('/') != -1 || + quotedTerm.indexOf('(') != -1 || + quotedTerm.indexOf(')') != -1) { + quotedTerm = '"' + quotedTerm + '"'; } // ### The qualifier may need quoting. // ### We don't always need spaces around `relation'. - return qualifier + " " + relation + " " + value; + return qualifier + " " + relation + " " + quotedTerm; } } -- 1.7.10.4