1 // $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $
3 package org.z3950.zing.cql;
4 import java.io.StreamTokenizer;
5 import java.io.StringReader;
8 // This is a trivial subclass for java.io.StreamTokenizer which knows
9 // about the multi-character tokens "<=", ">=" and "<>", and includes
10 // a render() method. Used only by CQLParser.
12 class CQLLexer extends StreamTokenizer {
13 private static boolean DEBUG;
14 static int TT_LE = 1000; // The "<=" relation
15 static int TT_GE = 1001; // The ">=" relation
16 static int TT_NE = 1002; // The "<>" relation
17 static int TT_AND = 1003; // The "and" boolean
18 static int TT_OR = 1004; // The "or" boolean
19 static int TT_NOT = 1005; // The "not" boolean
20 static int TT_PROX = 1006; // The "prox" boolean
21 static int TT_ANY = 1007; // The "any" relation
22 static int TT_ALL = 1008; // The "all" relation
23 static int TT_EXACT = 1009; // The "exact" relation
25 // For halfDecentPushBack() and the code at the top of nextToken()
26 private static int TT_UNDEFINED = -1000;
27 int saved_ttype = TT_UNDEFINED;
31 CQLLexer(String cql, boolean lexdebug) {
32 super(new StringReader(cql));
39 wordChars('\'', '\''); // prevent this from introducing strings
43 private static void debug(String str) {
45 System.err.println("LEXDEBUG: " + str);
48 // I don't honestly understand why we need this, but the
49 // documentation for java.io.StreamTokenizer.pushBack() is pretty
50 // vague about its semantics, and it seems to me that they could
51 // be summed up as "it doesn't work". This version has the very
52 // clear semantics "pretend I didn't call nextToken() just then".
54 private void halfDecentPushBack() {
60 public int nextToken() throws java.io.IOException {
61 if (saved_ttype != TT_UNDEFINED) {
65 saved_ttype = TT_UNDEFINED;
66 debug("using saved ttype=" + ttype + ", " +
67 "nval=" + nval + ", sval='" + sval + "'");
71 underlyingNextToken();
73 debug("token starts with '<' ...");
74 underlyingNextToken();
76 debug("token continues with '=' - it's '<='");
78 } else if (ttype == '>') {
79 debug("token continues with '>' - it's '<>'");
82 debug("next token is " + render() + " (pushed back)");
85 debug("AFTER: ttype is now " + ttype + " - " + render());
87 } else if (ttype == '>') {
88 debug("token starts with '>' ...");
89 underlyingNextToken();
91 debug("token continues with '=' - it's '>='");
94 debug("next token is " + render() + " (pushed back)");
97 debug("AFTER: ttype is now " + ttype + " - " + render());
101 debug("done nextToken(): ttype=" + ttype + ", " +
102 "nval=" + nval + ", " + "sval='" + sval + "'" +
103 " (" + render() + ")");
108 // It's important to do keyword recognition here at the lowest
109 // level, otherwise when one of these words follows "<" or ">"
110 // (which can be the beginning of multi-character tokens) it gets
111 // pushed back as a string, and its keywordiness is not
114 public int underlyingNextToken() throws java.io.IOException {
116 if (ttype == TT_WORD) {
117 if (sval.equalsIgnoreCase("and")) {
119 } else if (sval.equalsIgnoreCase("or")) {
121 } else if (sval.equalsIgnoreCase("not")) {
123 } else if (sval.equalsIgnoreCase("prox")) {
125 } else if (sval.equalsIgnoreCase("any")) {
127 } else if (sval.equalsIgnoreCase("all")) {
129 } else if (sval.equalsIgnoreCase("exact")) {
136 // Simpler interface for the usual case: current token with quoting
138 return render(ttype, true);
141 String render(int token, boolean quoteChars) {
142 if (token == TT_EOF) {
144 } else if (token == TT_NUMBER) {
145 return "number: " + nval;
146 } else if (token == TT_WORD) {
147 return "word: " + sval;
148 } else if (token == '"') {
149 return "string: \"" + sval + "\"";
150 } else if (token == TT_LE) {
152 } else if (token == TT_GE) {
154 } else if (token == TT_NE) {
156 } else if (token == TT_AND) {
158 } else if (token == TT_OR) {
160 } else if (token == TT_NOT) {
162 } else if (token == TT_PROX) {
164 } else if (token == TT_ANY) {
166 } else if (token == TT_ALL) {
168 } else if (token == TT_EXACT) {
172 String res = String.valueOf((char) token);
173 if (quoteChars) res = "'" + res + "'";
177 public static void main(String[] args) throws Exception {
178 CQLLexer lexer = new CQLLexer(args[0], true);
181 while ((token = lexer.nextToken()) != TT_EOF) {
182 // Nothing to do: debug() statements render tokens for us