X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Forg%2Fz3950%2Fzing%2Fcql%2FCQLParser.java;h=0aded47d1785d238da77fc5658171bd1fbd67594;hb=f8154c71944186a9b64ddb782082a2026c5a912f;hp=8c5d871be08b19b1f334c9e597d84bba85da110b;hpb=ac8fb2ba7ecad42eb920c65b7056e3a3e677275c;p=cql-java-moved-to-github.git diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java index 8c5d871..0aded47 100644 --- a/src/org/z3950/zing/cql/CQLParser.java +++ b/src/org/z3950/zing/cql/CQLParser.java @@ -1,32 +1,24 @@ -// $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $ +// $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; -import java.util.Properties; -import java.io.InputStream; import java.io.IOException; -import java.io.StringReader; -import java.io.StreamTokenizer; /** - * Compiles a CQL string into a parse tree ... - * ### + * Compiles a CQL string into a parse tree. + * ## * - * @version $Id: CQLParser.java,v 1.9 2002-10-29 10:15:58 mike Exp $ + * @version $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ public class CQLParser { private CQLLexer lexer; - static private boolean PARSEDEBUG = false; - static private boolean LEXDEBUG = true; + static private boolean DEBUG = false; + static private boolean LEXDEBUG = false; - private class CQLParseException extends Exception { - CQLParseException(String s) { super(s); } - } - - static void debug(String str) { - if (PARSEDEBUG) + private static void debug(String str) { + if (DEBUG) System.err.println("PARSEDEBUG: " + str); } @@ -36,38 +28,42 @@ public class CQLParser { lexer.nextToken(); debug("about to parse_query()"); - CQLNode root = parse_query("srw.serverChoice", "="); + CQLNode root = parse_query("srw.serverChoice", new CQLRelation("=")); if (lexer.ttype != lexer.TT_EOF) throw new CQLParseException("junk after end: " + lexer.render()); return root; } - private CQLNode parse_query(String qualifier, String relation) + private CQLNode parse_query(String qualifier, CQLRelation relation) throws CQLParseException, IOException { debug("in parse_query()"); CQLNode term = parse_term(qualifier, relation); - while (lexer.ttype == lexer.TT_WORD) { - String op = lexer.sval.toLowerCase(); - debug("checking op '" + op + "'"); - if (lexer.sval.equals("and")) { - match(lexer.TT_WORD); + while (lexer.ttype != lexer.TT_EOF && + lexer.ttype != ')') { + if (lexer.ttype == lexer.TT_AND) { + match(lexer.TT_AND); CQLNode term2 = parse_term(qualifier, relation); term = new CQLAndNode(term, term2); - } else if (lexer.sval.equals("or")) { - match(lexer.TT_WORD); + } else if (lexer.ttype == lexer.TT_OR) { + match(lexer.TT_OR); CQLNode term2 = parse_term(qualifier, relation); term = new CQLOrNode(term, term2); - } else if (lexer.sval.equals("not")) { - match(lexer.TT_WORD); + } else if (lexer.ttype == lexer.TT_NOT) { + match(lexer.TT_NOT); CQLNode term2 = parse_term(qualifier, relation); term = new CQLNotNode(term, term2); - } else if (lexer.sval.equals("prox")) { - // ### Handle "prox" + } else if (lexer.ttype == lexer.TT_PROX) { + match(lexer.TT_PROX); + CQLProxNode proxnode = new CQLProxNode(term); + gatherProxParameters(proxnode); + CQLNode term2 = parse_term(qualifier, relation); + proxnode.addSecondSubterm(term2); + term = (CQLNode) proxnode; } else { - throw new CQLParseException("unrecognised boolean: '" + - lexer.sval + "'"); + throw new CQLParseException("expected boolean, got " + + lexer.render()); } } @@ -75,7 +71,7 @@ public class CQLParser { return term; } - private CQLNode parse_term(String qualifier, String relation) + private CQLNode parse_term(String qualifier, CQLRelation relation) throws CQLParseException, IOException { debug("in parse_term()"); @@ -95,23 +91,106 @@ public class CQLParser { debug("non-parenthesised term"); word = lexer.sval; match(lexer.ttype); - if (!isRelation()) + if (!isBaseRelation()) break; qualifier = word; - relation = lexer.render(false); - debug("got relation '" + relation + "'"); + relation = new CQLRelation(lexer.render(lexer.ttype, false)); match(lexer.ttype); - debug("qualifier='" + qualifier + ", relation='" + relation + "'"); + + while (lexer.ttype == '/') { + match('/'); + // ### could insist on known modifiers only + if (lexer.ttype != lexer.TT_WORD) + throw new CQLParseException("expected relation modifier, " + + "got " + lexer.render()); + relation.addModifier(lexer.sval); + match(lexer.TT_WORD); + } + + debug("qualifier='" + qualifier + ", " + + "relation='" + relation.toCQL() + "'"); } CQLTermNode node = new CQLTermNode(qualifier, relation, word); - debug("made term node " + node); + debug("made term node " + node.toCQL()); return node; } - boolean isRelation() { - // ### Handle any, all and exact + private void gatherProxParameters(CQLProxNode node) + throws CQLParseException, IOException { + for (int i = 0; i < 4; i++) { + if (lexer.ttype != '/') + return; // end of proximity parameters + + match('/'); + if (lexer.ttype != '/') { + // not an omitted default + switch (i) { + // Assumes order is: relation/distance/unit/ordering + case 0: gatherProxRelation(node); break; + case 1: gatherProxDistance(node); break; + case 2: gatherProxUnit(node); break; + case 3: gatherProxOrdering(node); break; + } + } + } + } + + private void gatherProxRelation(CQLProxNode node) + throws CQLParseException, IOException { + if (!isProxRelation()) + throw new CQLParseException("expected proximity relation, got " + + lexer.render()); + node.addModifier("relation", lexer.render(lexer.ttype, false)); + match(lexer.ttype); + debug("gPR matched " + lexer.render(lexer.ttype, false)); + } + + private void gatherProxDistance(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_NUMBER) + throw new CQLParseException("expected proximity distance, got " + + lexer.render()); + node.addModifier("distance", lexer.render(lexer.ttype, false)); + match(lexer.ttype); + debug("gPD matched " + lexer.render(lexer.ttype, false)); + } + + private void gatherProxUnit(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_pWORD && + lexer.ttype != lexer.TT_SENTENCE && + lexer.ttype != lexer.TT_PARAGRAPH && + lexer.ttype != lexer.TT_ELEMENT) + throw new CQLParseException("expected proximity unit, got " + + lexer.render()); + node.addModifier("unit", lexer.render()); + match(lexer.ttype); + } + + private void gatherProxOrdering(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_ORDERED && + lexer.ttype != lexer.TT_UNORDERED) + throw new CQLParseException("expected proximity ordering, got " + + lexer.render()); + node.addModifier("ordering", lexer.render()); + match(lexer.ttype); + } + + boolean isBaseRelation() { + debug("isBaseRelation: checking ttype=" + lexer.ttype + + " (" + lexer.render() + ")"); + return (isProxRelation() || + lexer.ttype == lexer.TT_ANY || + lexer.ttype == lexer.TT_ALL || + lexer.ttype == lexer.TT_EXACT); + } + + boolean isProxRelation() { + debug("isProxRelation: checking ttype=" + lexer.ttype + + " (" + lexer.render() + ")"); return (lexer.ttype == '<' || lexer.ttype == '>' || lexer.ttype == '=' || @@ -122,12 +201,15 @@ public class CQLParser { private void match(int token) throws CQLParseException, IOException { - debug("in match(" + lexer.render(token, null, true) + ")"); + debug("in match(" + lexer.render(token, true) + ")"); if (lexer.ttype != token) throw new CQLParseException("expected " + - lexer.render(token, null, true) + + lexer.render(token, true) + ", " + "got " + lexer.render()); - lexer.nextToken(); + int tmp = lexer.nextToken(); + debug("match() got token=" + lexer.ttype + ", " + + "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" + + " (tmp=" + tmp + ")"); } @@ -159,26 +241,33 @@ public class CQLParser { // // public static void main (String[] args) { - if (args.length != 0) { - System.err.println("Usage: " + args[0]); + if (args.length > 1) { + System.err.println("Usage: CQLParser []"); + System.err.println("If unspecified, query is read from stdin"); System.exit(1); } - byte[] bytes = new byte[10000]; - try { - // Read in the whole of standard input in one go - int nbytes = System.in.read(bytes); - } catch (java.io.IOException ex) { - System.err.println("Can't read query: " + ex.getMessage()); - System.exit(2); + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); } - String cql = new String(bytes); + CQLParser parser = new CQLParser(); CQLNode root; try { root = parser.parse(cql); debug("root='" + root + "'"); - System.out.println(root.toXCQL(0)); + System.out.println(root.toCQL()); } catch (CQLParseException ex) { System.err.println("Syntax error: " + ex.getMessage()); System.exit(3); @@ -188,100 +277,3 @@ public class CQLParser { } } } - - -// This is a trivial subclass for java.io.StreamTokenizer which knows -// about the multi-character tokens "<=", ">=" and "<>", and included -// a render() method. Used only by CQLParser. -// -class CQLLexer extends StreamTokenizer { - private static boolean LEXDEBUG; - static int TT_LE = 1000; // The token "<=" - static int TT_GE = 1001; // The token ">=" - static int TT_NE = 1002; // The token "<>" - - static void debug(String str) { - if (LEXDEBUG) - System.err.println("LEXDEBUG: " + str); - } - - CQLLexer(String cql, boolean lexdebug) { - super(new StringReader(cql)); - this.ordinaryChar('='); - this.ordinaryChar('<'); - this.ordinaryChar('>'); - this.ordinaryChar('/'); - this.ordinaryChar('('); - this.ordinaryChar(')'); - this.wordChars('\'', '\''); // prevent this from introducing strings - this.LEXDEBUG = lexdebug; - } - - public int nextToken() throws java.io.IOException { - int token = super.nextToken(); - - if (token == '<') { - debug("token starts with '<' ..."); - int t2 = super.nextToken(); - if (t2 == '=') { - debug("token continues with '=' - it's '<='"); - this.ttype = token = TT_LE; - } else if (t2 == '>') { - debug("token continues with '>' - it's '<>'"); - this.ttype = token = TT_NE; - } else { - debug("next token is " + token + " (pushed back)"); - //this.pushBack(); - } - } else if (token == '>') { - debug("token starts with '>' ..."); - int t2 = super.nextToken(); - if (t2 == '=') { - debug("token continues with '=' - it's '>='"); - this.ttype = token = TT_GE; - } else { - debug("next token is " + token + " (pushed back)"); - //this.pushBack(); - } - } - - debug("token=" + token + ", " + - "nval=" + this.nval + ", " + "sval=" + this.sval); - - return token; - } - - String render() { - return this.render(this.ttype, null, true); - } - - String render(boolean quoteChars) { - return this.render(this.ttype, null, quoteChars); - } - - String render(int token, String str, boolean quoteChars) { - String ret; - - if (token == this.TT_EOF) { - return "EOF"; - } else if (token == this.TT_EOL) { - return "EOL"; - } else if (token == this.TT_NUMBER) { - return "number: " + this.nval; - } else if (token == this.TT_WORD) { - return "word: " + this.sval; - } else if (token == '"') { - return "string: \"" + this.sval + "\""; - } else if (token == TT_LE) { - return "<="; - } else if (token == TT_GE) { - return ">="; - } else if (token == TT_NE) { - return "<>"; - } - - String res = String.valueOf((char) token); - if (quoteChars) res = "'" + res + "'"; - return res; - } -}