1 // $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $
3 package org.z3950.zing.cql;
4 import java.util.Properties;
5 import java.io.InputStream;
6 import java.io.IOException;
7 import java.io.StringReader;
8 import java.io.StreamTokenizer;
12 * Compiles a CQL string into a parse tree ...
15 * @version $Id: CQLParser.java,v 1.8 2002-10-27 00:46:25 mike Exp $
16 * @see <A href="http://zing.z3950.org/cql/index.html"
17 * >http://zing.z3950.org/cql/index.html</A>
19 public class CQLParser {
20 private CQLLexer lexer;
21 static private boolean PARSEDEBUG = false;
22 static private boolean LEXDEBUG = false;
24 private class CQLParseException extends Exception {
25 CQLParseException(String s) { super(s); }
28 static void debug(String str) {
30 System.err.println("PARSEDEBUG: " + str);
33 public CQLNode parse(String cql)
34 throws CQLParseException, IOException {
35 lexer = new CQLLexer(cql, LEXDEBUG);
38 debug("about to parse_query()");
39 CQLNode root = parse_query("srw.serverChoice", "=");
40 if (lexer.ttype != lexer.TT_EOF)
41 throw new CQLParseException("junk after end: " + lexer.render());
46 private CQLNode parse_query(String qualifier, String relation)
47 throws CQLParseException, IOException {
48 debug("in parse_query()");
50 CQLNode term = parse_term(qualifier, relation);
51 while (lexer.ttype == lexer.TT_WORD) {
52 String op = lexer.sval.toLowerCase();
53 debug("checking op '" + op + "'");
54 if (lexer.sval.equals("and")) {
56 CQLNode term2 = parse_term(qualifier, relation);
57 term = new CQLAndNode(term, term2);
58 } else if (lexer.sval.equals("or")) {
60 CQLNode term2 = parse_term(qualifier, relation);
61 term = new CQLOrNode(term, term2);
62 } else if (lexer.sval.equals("not")) {
64 CQLNode term2 = parse_term(qualifier, relation);
65 term = new CQLNotNode(term, term2);
66 } else if (lexer.sval.equals("prox")) {
69 throw new CQLParseException("unrecognised boolean: '" +
78 private CQLNode parse_term(String qualifier, String relation)
79 throws CQLParseException, IOException {
80 debug("in parse_term()");
84 if (lexer.ttype == '(') {
85 debug("parenthesised term");
87 CQLNode expr = parse_query(qualifier, relation);
90 } else if (lexer.ttype != lexer.TT_WORD && lexer.ttype != '"') {
91 throw new CQLParseException("expected qualifier or term, " +
92 "got " + lexer.render());
95 debug("non-parenthesised term");
102 relation = lexer.render(false);
104 debug("qualifier='" + qualifier + ", relation='" + relation + "'");
107 CQLTermNode node = new CQLTermNode(qualifier, relation, word);
108 debug("made term node " + node);
112 boolean isRelation() {
113 // ### Also need to handle <=, >=, <>
114 return (lexer.ttype == '<' ||
115 lexer.ttype == '>' ||
119 private void match(int token)
120 throws CQLParseException, IOException {
121 debug("in match(" + lexer.render(token, null, true) + ")");
122 if (lexer.ttype != token)
123 throw new CQLParseException("expected " +
124 lexer.render(token, null, true) +
125 ", " + "got " + lexer.render());
132 // e.g. echo '(au=Kerninghan or au=Ritchie) and ti=Unix' |
133 // java org.z3950.zing.cql.CQLParser
136 // <boolean>and</boolean>
138 // <boolean>or</boolean>
141 // <relation>=<relation>
142 // <term>Kerninghan<term>
146 // <relation>=<relation>
147 // <term>Ritchie<term>
152 // <relation>=<relation>
157 public static void main (String[] args) {
158 if (args.length != 0) {
159 System.err.println("Usage: " + args[0]);
163 byte[] bytes = new byte[10000];
165 // Read in the whole of standard input in one go
166 int nbytes = System.in.read(bytes);
167 } catch (java.io.IOException ex) {
168 System.err.println("Can't read query: " + ex.getMessage());
171 String cql = new String(bytes);
172 CQLParser parser = new CQLParser();
175 root = parser.parse(cql);
176 debug("root='" + root + "'");
177 System.out.println(root.toXCQL(0));
178 } catch (CQLParseException ex) {
179 System.err.println("Syntax error: " + ex.getMessage());
181 } catch (java.io.IOException ex) {
182 System.err.println("Can't compile query: " + ex.getMessage());
189 // This is a trivial subclass for java.io.StreamTokenizer which knows
190 // about the multi-character tokens "<=", ">=" and "<>", and included
191 // a render() method. Used only by CQLParser.
193 class CQLLexer extends StreamTokenizer {
194 private static boolean lexdebug;
196 CQLLexer(String cql, boolean lexdebug) {
197 super(new StringReader(cql));
198 this.ordinaryChar('=');
199 this.ordinaryChar('<');
200 this.ordinaryChar('>');
201 this.ordinaryChar('/');
202 this.ordinaryChar('(');
203 this.ordinaryChar(')');
204 this.wordChars('\'', '\''); // prevent this from introducing strings
205 this.lexdebug = lexdebug;
208 public int nextToken() throws java.io.IOException {
209 int token = super.nextToken();
211 System.out.println("LEXDEBUG: " +
212 "token=" + token + ", " +
213 "nval=" + this.nval + ", " +
214 "sval=" + this.sval);
220 return this.render(this.ttype, null, true);
223 String render(boolean quoteChars) {
224 return this.render(this.ttype, null, quoteChars);
227 String render(int token, String str, boolean quoteChars) {
230 if (token == this.TT_EOF) {
232 } else if (token == this.TT_EOL) {
234 } else if (token == this.TT_NUMBER) {
235 return "number: " + this.nval;
236 } else if (token == this.TT_WORD) {
237 return "word: \"" + this.sval + "\"";
238 } else if (token == '"') {
239 return "string: \"" + this.sval + "\"";
242 String res = String.valueOf((char) token);
243 if (quoteChars) res = "'" + res + "'";