From d48bccaed9308a737cdfa55ba555891d0ad85b87 Mon Sep 17 00:00:00 2001 From: Jakub Skoczen Date: Fri, 14 Mar 2014 19:11:02 +0100 Subject: [PATCH] Re-add CQLLexer#main --- src/main/java/org/z3950/zing/cql/CQLLexer.java | 216 +++++++++++++++++--- .../java/org/z3950/zing/cql/CQLLexerSimple.java | 173 ---------------- src/main/java/org/z3950/zing/cql/CQLParser.java | 58 +++--- src/main/java/org/z3950/zing/cql/CQLTokenizer.java | 40 ++++ 4 files changed, 258 insertions(+), 229 deletions(-) delete mode 100644 src/main/java/org/z3950/zing/cql/CQLLexerSimple.java create mode 100644 src/main/java/org/z3950/zing/cql/CQLTokenizer.java diff --git a/src/main/java/org/z3950/zing/cql/CQLLexer.java b/src/main/java/org/z3950/zing/cql/CQLLexer.java index 2247fd0..2c867ea 100644 --- a/src/main/java/org/z3950/zing/cql/CQLLexer.java +++ b/src/main/java/org/z3950/zing/cql/CQLLexer.java @@ -9,32 +9,194 @@ package org.z3950.zing.cql; * * @author jakub */ -public interface CQLLexer { - - public static final int TT_EOF = -1; - public static final int TT_WORD = -3; - public static final int TT_NOTHING = -4; - - public final static int TT_LE = 1000; // The "<=" relation - public final static int TT_GE = 1001; // The ">=" relation - public final static int TT_NE = 1002; // The "<>" relation - public final static int TT_EQEQ = 1003; // The "==" relation - public final static int TT_AND = 1004; // The "and" boolean - public final static int TT_OR = 1005; // The "or" boolean - public final static int TT_NOT = 1006; // The "not" boolean - public final static int TT_PROX = 1007; // The "prox" boolean - public final static int TT_SORTBY = 1008; // The "sortby" operator - - public void move(); - - public String value(); - - public int what(); - - public String render(); - - public String render(int what, boolean quote); - - public int pos(); +public class CQLLexer implements CQLTokenizer { + private String qs; + private int qi; + private int ql; + private int what = TT_NOTHING; + private String val; + private String lval; + private StringBuilder buf = new StringBuilder(); + + public CQLLexer(String cql, boolean debug) { + qs = cql; + ql = cql.length(); + } + + @Override + public void move() { + //eat whitespace + while (qi < ql && strchr(" \t\r\n", qs.charAt(qi))) + qi++; + //eof + if (qi == ql) { + what = TT_EOF; + return; + } + //current char + char c = qs.charAt(qi); + //separators + if (strchr("()/", c)) { + what = c; + qi++; + //comparitor + } else if (strchr("<>=", c)) { + what = c; + qi++; + //two-char comparitor + if (qi < ql) { + char d = qs.charAt(qi); + String comp = String.valueOf((char) c) + String.valueOf((char) d); + if (comp.equals("==")) { + what = TT_EQEQ; + qi++; + } + else if (comp.equals("<=")) { + what = TT_LE; + qi++; + } + else if (comp.equals(">=")) { + what = TT_GE; + qi++; + } + else if (comp.equals("<>")) { + what = TT_NE; + qi++; + } + } + //quoted string + } else if (strchr("\"", c)) { //no single-quotes + what = '"'; + //remember quote char + char mark = c; + qi++; + boolean escaped = false; + buf.setLength(0); //reset buffer + while (qi < ql) { + if (!escaped && qs.charAt(qi) == mark) //terminator + break; + if (escaped && strchr("*?^\\", qs.charAt(qi))) //no escaping for d-quote + buf.append("\\"); + if (!escaped && qs.charAt(qi) == '\\') { //escape-char + escaped = true; + qi++; + continue; + } + escaped = false; //reset escape + buf.append(qs.charAt(qi)); + qi++; + } + val = buf.toString(); + lval = val.toLowerCase(); + if (qi < ql) + qi++; + else //unterminated + what = TT_EOF; //notify error + //unquoted string + } else { + what = TT_WORD; + buf.setLength(0); //reset buffer + while (qi < ql + && !strchr("()/<>= \t\r\n", qs.charAt(qi))) { + buf.append(qs.charAt(qi)); + qi++; + } + val = buf.toString(); + lval = val.toLowerCase(); + if (lval.equals("or")) what = TT_OR; + else if (lval.equals("and")) what = TT_AND; + else if (lval.equals("not")) what = TT_NOT; + else if (lval.equals("prox")) what = TT_PROX; + else if (lval.equals("sortby")) what = TT_SORTBY; + } + } + + private boolean strchr(String s, char ch) { + return s.indexOf(ch) >= 0; + } + + @Override + public String value() { + return val; + } + + @Override + public int what() { + return what; + } + + @Override + public String render() { + return render(what, true); + } + + @Override + public String render(int token, boolean quoteChars) { + switch (token) { + case TT_EOF: + return "EOF"; + case TT_WORD: + return "word: " + val; + case '"': + return "string: \"" + val + "\""; + case TT_LE: + return "<="; + case TT_GE: + return ">="; + case TT_NE: + return "<>"; + case TT_EQEQ: + return "=="; + case TT_AND: + return "and"; + case TT_NOT: + return "not"; + case TT_OR: + return "or"; + case TT_PROX: + return "prox"; + case TT_SORTBY: + return "sortby"; + default: + //a single character, such as '(' or '/' or relation + String res = String.valueOf((char) token); + if (quoteChars) + res = "'" + res + "'"; + return res; + } + } + + @Override + public int pos() { + return qi; + } + public static void main(String[] args) throws Exception { + if (args.length > 1) { + System.err.println("Usage: CQLLexer []"); + System.err.println("If unspecified, query is read from stdin"); + System.exit(1); + } + + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); + } + + CQLTokenizer lexer = new CQLLexer(cql, true); + while ((lexer.what()) != TT_EOF) { + lexer.move(); + System.out.println(lexer.render()); + } + } } diff --git a/src/main/java/org/z3950/zing/cql/CQLLexerSimple.java b/src/main/java/org/z3950/zing/cql/CQLLexerSimple.java deleted file mode 100644 index 80931cc..0000000 --- a/src/main/java/org/z3950/zing/cql/CQLLexerSimple.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 1995-2014, Index Datassss - * All rights reserved. - * See the file LICENSE for details. - */ -package org.z3950.zing.cql; - -/** - * - * @author jakub - */ -public class CQLLexerSimple implements CQLLexer { - private String qs; - private int qi; - private int ql; - private int what = TT_NOTHING; - private String val; - private String lval; - private StringBuilder buf = new StringBuilder(); - - public CQLLexerSimple(String cql, boolean debug) { - qs = cql; - ql = cql.length(); - } - - @Override - public void move() { - //eat whitespace - while (qi < ql && strchr(" \t\r\n", qs.charAt(qi))) - qi++; - //eof - if (qi == ql) { - what = TT_EOF; - return; - } - //current char - char c = qs.charAt(qi); - //separators - if (strchr("()/", c)) { - what = c; - qi++; - //comparitor - } else if (strchr("<>=", c)) { - what = c; - qi++; - //two-char comparitor - if (qi < ql) { - char d = qs.charAt(qi); - String comp = String.valueOf((char) c) + String.valueOf((char) d); - if (comp.equals("==")) { - what = TT_EQEQ; - qi++; - } - else if (comp.equals("<=")) { - what = TT_LE; - qi++; - } - else if (comp.equals(">=")) { - what = TT_GE; - qi++; - } - else if (comp.equals("<>")) { - what = TT_NE; - qi++; - } - } - //quoted string - } else if (strchr("\"", c)) { //no single-quotes - what = '"'; - //remember quote char - char mark = c; - qi++; - boolean escaped = false; - buf.setLength(0); //reset buffer - while (qi < ql) { - if (!escaped && qs.charAt(qi) == mark) //terminator - break; - if (escaped && strchr("*?^\\", qs.charAt(qi))) //no escaping for d-quote - buf.append("\\"); - if (!escaped && qs.charAt(qi) == '\\') { //escape-char - escaped = true; - qi++; - continue; - } - escaped = false; //reset escape - buf.append(qs.charAt(qi)); - qi++; - } - val = buf.toString(); - lval = val.toLowerCase(); - if (qi < ql) - qi++; - else //unterminated - what = TT_EOF; //notify error - //unquoted string - } else { - what = TT_WORD; - buf.setLength(0); //reset buffer - while (qi < ql - && !strchr("()/<>= \t\r\n", qs.charAt(qi))) { - buf.append(qs.charAt(qi)); - qi++; - } - val = buf.toString(); - lval = val.toLowerCase(); - if (lval.equals("or")) what = TT_OR; - else if (lval.equals("and")) what = TT_AND; - else if (lval.equals("not")) what = TT_NOT; - else if (lval.equals("prox")) what = TT_PROX; - else if (lval.equals("sortby")) what = TT_SORTBY; - } - } - - private boolean strchr(String s, char ch) { - return s.indexOf(ch) >= 0; - } - - @Override - public String value() { - return val; - } - - @Override - public int what() { - return what; - } - - @Override - public String render() { - return render(what, true); - } - - @Override - public String render(int token, boolean quoteChars) { - switch (token) { - case TT_EOF: - return "EOF"; - case TT_WORD: - return "word: " + val; - case '"': - return "string: \"" + val + "\""; - case TT_LE: - return "<="; - case TT_GE: - return ">="; - case TT_NE: - return "<>"; - case TT_EQEQ: - return "=="; - case TT_AND: - return "and"; - case TT_NOT: - return "not"; - case TT_OR: - return "or"; - case TT_PROX: - return "prox"; - case TT_SORTBY: - return "sortby"; - default: - //a single character, such as '(' or '/' or relation - String res = String.valueOf((char) token); - if (quoteChars) - res = "'" + res + "'"; - return res; - } - } - - @Override - public int pos() { - return qi; - } -} diff --git a/src/main/java/org/z3950/zing/cql/CQLParser.java b/src/main/java/org/z3950/zing/cql/CQLParser.java index c373f09..31bd7fb 100644 --- a/src/main/java/org/z3950/zing/cql/CQLParser.java +++ b/src/main/java/org/z3950/zing/cql/CQLParser.java @@ -20,7 +20,7 @@ import java.util.Set; * >http://zing.z3950.org/cql/index.html */ public class CQLParser { - private CQLLexer lexer; + private CQLTokenizer lexer; private final int compat; // When false, implement CQL 1.2 private final Set customRelations = new HashSet(); @@ -112,13 +112,13 @@ public class CQLParser { * tree representing the query. */ public CQLNode parse(String cql) throws CQLParseException, IOException { - lexer = new CQLLexerSimple(cql, LEXDEBUG); + lexer = new CQLLexer(cql, LEXDEBUG); lexer.move(); debug("about to parseQuery()"); CQLNode root = parseTopLevelPrefixes("cql.serverChoice", new CQLRelation(compat == V1POINT2 ? "=" : "scr")); - if (lexer.what() != CQLLexer.TT_EOF) + if (lexer.what() != CQLTokenizer.TT_EOF) throw new CQLParseException("junk after end: " + lexer.render(), lexer.pos()); @@ -135,12 +135,12 @@ public class CQLParser { CQLNode node = parseQuery(index, relation); if ((compat == V1POINT2 || compat == V1POINT1SORT) && - lexer.what() == CQLLexer.TT_SORTBY) { + lexer.what() == CQLTokenizer.TT_SORTBY) { match(lexer.what()); debug("sortspec"); CQLSortNode sortnode = new CQLSortNode(node); - while (lexer.what() != CQLLexer.TT_EOF) { + while (lexer.what() != CQLTokenizer.TT_EOF) { String sortindex = matchSymbol("sort index"); ModifierSet ms = gatherModifiers(sortindex); sortnode.addSortIndex(ms); @@ -161,21 +161,21 @@ public class CQLParser { debug("in parseQuery()"); CQLNode term = parseTerm(index, relation); - while (lexer.what() != CQLLexer.TT_EOF && + while (lexer.what() != CQLTokenizer.TT_EOF && lexer.what() != ')' && - lexer.what() != CQLLexer.TT_SORTBY) { - if (lexer.what() == CQLLexer.TT_AND || - lexer.what() == CQLLexer.TT_OR || - lexer.what() == CQLLexer.TT_NOT || - lexer.what() == CQLLexer.TT_PROX) { + lexer.what() != CQLTokenizer.TT_SORTBY) { + if (lexer.what() == CQLTokenizer.TT_AND || + lexer.what() == CQLTokenizer.TT_OR || + lexer.what() == CQLTokenizer.TT_NOT || + lexer.what() == CQLTokenizer.TT_PROX) { int type = lexer.what(); String val = lexer.value(); match(type); ModifierSet ms = gatherModifiers(val); CQLNode term2 = parseTerm(index, relation); - term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) : - (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) : - (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) : + term = ((type == CQLTokenizer.TT_AND) ? new CQLAndNode(term, term2, ms) : + (type == CQLTokenizer.TT_OR) ? new CQLOrNode (term, term2, ms) : + (type == CQLTokenizer.TT_NOT) ? new CQLNotNode(term, term2, ms) : new CQLProxNode(term, term2, ms)); } else { throw new CQLParseException("expected boolean, got " + @@ -194,7 +194,7 @@ public class CQLParser { ModifierSet ms = new ModifierSet(base); while (lexer.what() == '/') { match('/'); - if (lexer.what() != CQLLexer.TT_WORD) + if (lexer.what() != CQLTokenizer.TT_WORD) throw new CQLParseException("expected modifier, " + "got " + lexer.render(), lexer.pos()); @@ -233,16 +233,16 @@ public class CQLParser { debug("non-parenthesised term"); word = matchSymbol("index or term"); - while (lexer.what() == CQLLexer.TT_WORD && !isRelation()) { + while (lexer.what() == CQLTokenizer.TT_WORD && !isRelation()) { word = word + " " + lexer.value(); - match(CQLLexer.TT_WORD); + match(CQLTokenizer.TT_WORD); } if (!isRelation()) break; index = word; - String relstr = (lexer.what() == CQLLexer.TT_WORD ? + String relstr = (lexer.what() == CQLTokenizer.TT_WORD ? lexer.value() : lexer.render(lexer.what(), false)); relation = new CQLRelation(relstr); match(lexer.what()); @@ -280,7 +280,7 @@ public class CQLParser { private boolean isRelation() { debug("isRelation: checking what()=" + lexer.what() + " (" + lexer.render() + ")"); - if (lexer.what() == CQLLexer.TT_WORD && + if (lexer.what() == CQLTokenizer.TT_WORD && (lexer.value().indexOf('.') >= 0 || lexer.value().equals("any") || lexer.value().equals("all") || @@ -301,10 +301,10 @@ public class CQLParser { return (lexer.what() == '<' || lexer.what() == '>' || lexer.what() == '=' || - lexer.what() == CQLLexer.TT_LE || - lexer.what() == CQLLexer.TT_GE || - lexer.what() == CQLLexer.TT_NE || - lexer.what() == CQLLexer.TT_EQEQ); + lexer.what() == CQLTokenizer.TT_LE || + lexer.what() == CQLTokenizer.TT_GE || + lexer.what() == CQLTokenizer.TT_NE || + lexer.what() == CQLTokenizer.TT_EQEQ); } private void match(int token) @@ -323,7 +323,7 @@ public class CQLParser { throws CQLParseException, IOException { debug("in matchSymbol()"); - if (lexer.what() == CQLLexer.TT_WORD || + if (lexer.what() == CQLTokenizer.TT_WORD || lexer.what() == '"' || // The following is a complete list of keywords. Because // they're listed here, they can be used unquoted as @@ -331,11 +331,11 @@ public class CQLParser { // ### Instead, we should ask the lexer whether what we // have is a keyword, and let the knowledge reside there. (allowKeywordTerms && - lexer.what() == CQLLexer.TT_AND || - lexer.what() == CQLLexer.TT_OR || - lexer.what() == CQLLexer.TT_NOT || - lexer.what() == CQLLexer.TT_PROX || - lexer.what() == CQLLexer.TT_SORTBY)) { + lexer.what() == CQLTokenizer.TT_AND || + lexer.what() == CQLTokenizer.TT_OR || + lexer.what() == CQLTokenizer.TT_NOT || + lexer.what() == CQLTokenizer.TT_PROX || + lexer.what() == CQLTokenizer.TT_SORTBY)) { String symbol = lexer.value(); match(lexer.what()); return symbol; diff --git a/src/main/java/org/z3950/zing/cql/CQLTokenizer.java b/src/main/java/org/z3950/zing/cql/CQLTokenizer.java new file mode 100644 index 0000000..4ab0702 --- /dev/null +++ b/src/main/java/org/z3950/zing/cql/CQLTokenizer.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1995-2014, Index Datassss + * All rights reserved. + * See the file LICENSE for details. + */ +package org.z3950.zing.cql; + +/** + * + * @author jakub + */ +public interface CQLTokenizer { + + public static final int TT_EOF = -1; + public static final int TT_WORD = -3; + public static final int TT_NOTHING = -4; + + public final static int TT_LE = 1000; // The "<=" relation + public final static int TT_GE = 1001; // The ">=" relation + public final static int TT_NE = 1002; // The "<>" relation + public final static int TT_EQEQ = 1003; // The "==" relation + public final static int TT_AND = 1004; // The "and" boolean + public final static int TT_OR = 1005; // The "or" boolean + public final static int TT_NOT = 1006; // The "not" boolean + public final static int TT_PROX = 1007; // The "prox" boolean + public final static int TT_SORTBY = 1008; // The "sortby" operator + + public void move(); + + public String value(); + + public int what(); + + public String render(); + + public String render(int what, boolean quote); + + public int pos(); + +} -- 1.7.10.4