-// $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $
+// $Id: CQLLexer.java,v 1.13 2007-06-29 15:38:56 mike Exp $
package org.z3950.zing.cql;
import java.io.StreamTokenizer;
import java.io.StringReader;
+import java.util.Hashtable;
-// This is a trivial subclass for java.io.StreamTokenizer which knows
-// about the multi-character tokens "<=", ">=" and "<>", and includes
-// a render() method. Used only by CQLParser.
+// This is a semi-trivial subclass for java.io.StreamTokenizer that:
+// * Has a halfDecentPushBack() method that actually works
+// * Includes a render() method
+// * Knows about the multi-character tokens "<=", ">=" and "<>"
+// * Recognises a set of keywords as tokens in their own right
+// * Includes some primitive debugging-output facilities
+// It's used only by CQLParser.
//
class CQLLexer extends StreamTokenizer {
- private static boolean DEBUG;
- static int TT_LE = 1000; // The "<=" relation
- static int TT_GE = 1001; // The ">=" relation
- static int TT_NE = 1002; // The "<>" relation
- static int TT_AND = 1003; // The "and" boolean
- static int TT_OR = 1004; // The "or" boolean
- static int TT_NOT = 1005; // The "not" boolean
- static int TT_PROX = 1006; // The "prox" boolean
- static int TT_ANY = 1007; // The "any" relation
- static int TT_ALL = 1008; // The "all" relation
- static int TT_EXACT = 1009; // The "exact" relation
+ // New publicly visible token-types
+ static int TT_LE = 1000; // The "<=" relation
+ static int TT_GE = 1001; // The ">=" relation
+ static int TT_NE = 1002; // The "<>" relation
+ static int TT_EQEQ = 1003; // The "==" relation
+ static int TT_AND = 1004; // The "and" boolean
+ static int TT_OR = 1005; // The "or" boolean
+ static int TT_NOT = 1006; // The "not" boolean
+ static int TT_PROX = 1007; // The "prox" boolean
+
+ // Support for keywords. It would be nice to compile this linear
+ // list into a Hashtable, but it's hard to store ints as hash
+ // values, and next to impossible to use them as hash keys. So
+ // we'll just scan the (very short) list every time we need to do
+ // a lookup.
+ private class Keyword {
+ int token;
+ String keyword;
+ Keyword(int token, String keyword) {
+ this.token = token;
+ this.keyword = keyword;
+ }
+ }
+ // This should logically be static, but Java won't allow it :-P
+ private Keyword[] keywords = {
+ new Keyword(TT_AND, "and"),
+ new Keyword(TT_OR, "or"),
+ new Keyword(TT_NOT, "not"),
+ new Keyword(TT_PROX, "prox"),
+ };
// For halfDecentPushBack() and the code at the top of nextToken()
private static int TT_UNDEFINED = -1000;
- int saved_ttype = TT_UNDEFINED;
- double saved_nval;
- String saved_sval;
+ private int saved_ttype = TT_UNDEFINED;
+ private double saved_nval;
+ private String saved_sval;
+
+ // Controls debugging output
+ private static boolean DEBUG;
CQLLexer(String cql, boolean lexdebug) {
super(new StringReader(cql));
+ wordChars('!', '?'); // ASCII-dependency!
+ wordChars('[', '`'); // ASCII-dependency!
+ quoteChar('"');
ordinaryChar('=');
ordinaryChar('<');
ordinaryChar('>');
ordinaryChar('(');
ordinaryChar(')');
wordChars('\'', '\''); // prevent this from introducing strings
+ parseNumbers();
DEBUG = lexdebug;
}
ttype = '>';
debug("AFTER: ttype is now " + ttype + " - " + render());
}
+ } else if (ttype == '=') {
+ debug("token starts with '=' ...");
+ underlyingNextToken();
+ if (ttype == '=') {
+ debug("token continues with '=' - it's '=='");
+ ttype = TT_EQEQ;
+ } else {
+ debug("next token is " + render() + " (pushed back)");
+ halfDecentPushBack();
+ ttype = '=';
+ debug("AFTER: ttype is now " + ttype + " - " + render());
+ }
}
debug("done nextToken(): ttype=" + ttype + ", " +
//
public int underlyingNextToken() throws java.io.IOException {
super.nextToken();
- if (ttype == TT_WORD) {
- if (sval.equalsIgnoreCase("and")) {
- ttype = TT_AND;
- } else if (sval.equalsIgnoreCase("or")) {
- ttype = TT_OR;
- } else if (sval.equalsIgnoreCase("not")) {
- ttype = TT_NOT;
- } else if (sval.equalsIgnoreCase("prox")) {
- ttype = TT_PROX;
- } else if (sval.equalsIgnoreCase("any")) {
- ttype = TT_ANY;
- } else if (sval.equalsIgnoreCase("all")) {
- ttype = TT_ALL;
- } else if (sval.equalsIgnoreCase("exact")) {
- ttype = TT_EXACT;
- }
- }
+ if (ttype == TT_WORD)
+ for (int i = 0; i < keywords.length; i++)
+ if (sval.equalsIgnoreCase(keywords[i].keyword))
+ ttype = keywords[i].token;
+
return ttype;
}
if (token == TT_EOF) {
return "EOF";
} else if (token == TT_NUMBER) {
- return "number: " + nval;
+ if ((double) nval == (int) nval) {
+ return new Integer((int) nval).toString();
+ } else {
+ return new Double((double) nval).toString();
+ }
} else if (token == TT_WORD) {
return "word: " + sval;
} else if (token == '"') {
return ">=";
} else if (token == TT_NE) {
return "<>";
- } else if (token == TT_AND) {
- return "and";
- } else if (token == TT_OR) {
- return "or";
- } else if (token == TT_NOT) {
- return "not";
- } else if (token == TT_PROX) {
- return "prox";
- } else if (token == TT_ANY) {
- return "any";
- } else if (token == TT_ALL) {
- return "all";
- } else if (token == TT_EXACT) {
- return "exact";
+ } else if (token == TT_EQEQ) {
+ return "==";
}
+ // Check whether its associated with one of the keywords
+ for (int i = 0; i < keywords.length; i++)
+ if (token == keywords[i].token)
+ return keywords[i].keyword;
+
+ // Otherwise it must be a single character, such as '(' or '/'.
String res = String.valueOf((char) token);
if (quoteChars) res = "'" + res + "'";
return res;
}
public static void main(String[] args) throws Exception {
- CQLLexer lexer = new CQLLexer(args[0], true);
- int token;
+ if (args.length > 1) {
+ System.err.println("Usage: CQLLexer [<CQL-query>]");
+ System.err.println("If unspecified, query is read from stdin");
+ System.exit(1);
+ }
+
+ String cql;
+ if (args.length == 1) {
+ cql = args[0];
+ } else {
+ byte[] bytes = new byte[10000];
+ try {
+ // Read in the whole of standard input in one go
+ int nbytes = System.in.read(bytes);
+ } catch (java.io.IOException ex) {
+ System.err.println("Can't read query: " + ex.getMessage());
+ System.exit(2);
+ }
+ cql = new String(bytes);
+ }
+ CQLLexer lexer = new CQLLexer(cql, true);
+ int token;
while ((token = lexer.nextToken()) != TT_EOF) {
// Nothing to do: debug() statements render tokens for us
}