src/org/z3950/zing/cql/CQLLexer.java

   1 // $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $
   2
   3 package org.z3950.zing.cql;
   4 import java.io.StreamTokenizer;
   5 import java.io.StringReader;
   6
   7
   8 // This is a trivial subclass for java.io.StreamTokenizer which knows
   9 // about the multi-character tokens "<=", ">=" and "<>", and includes
  10 // a render() method.  Used only by CQLParser.
  11 //
  12 class CQLLexer extends StreamTokenizer {
  13     private static boolean DEBUG;
  14     static int TT_LE    = 1000; // The "<=" relation
  15     static int TT_GE    = 1001; // The ">=" relation
  16     static int TT_NE    = 1002; // The "<>" relation
  17     static int TT_AND   = 1003; // The "and" boolean
  18     static int TT_OR    = 1004; // The "or" boolean
  19     static int TT_NOT   = 1005; // The "not" boolean
  20     static int TT_PROX  = 1006; // The "prox" boolean
  21     static int TT_ANY   = 1007; // The "any" relation
  22     static int TT_ALL   = 1008; // The "all" relation
  23     static int TT_EXACT = 1009; // The "exact" relation
  24
  25     // For halfDecentPushBack() and the code at the top of nextToken()
  26     private static int TT_UNDEFINED = -1000;
  27     int saved_ttype = TT_UNDEFINED;
  28     double saved_nval;
  29     String saved_sval;
  30
  31     CQLLexer(String cql, boolean lexdebug) {
  32         super(new StringReader(cql));
  33         ordinaryChar('=');
  34         ordinaryChar('<');
  35         ordinaryChar('>');
  36         ordinaryChar('/');
  37         ordinaryChar('(');
  38         ordinaryChar(')');
  39         wordChars('\'', '\''); // prevent this from introducing strings
  40         DEBUG = lexdebug;
  41     }
  42
  43     private static void debug(String str) {
  44         if (DEBUG)
  45             System.err.println("LEXDEBUG: " + str);
  46     }
  47
  48     // I don't honestly understand why we need this, but the
  49     // documentation for java.io.StreamTokenizer.pushBack() is pretty
  50     // vague about its semantics, and it seems to me that they could
  51     // be summed up as "it doesn't work".  This version has the very
  52     // clear semantics "pretend I didn't call nextToken() just then".
  53     //
  54     private void halfDecentPushBack() {
  55         saved_ttype = ttype;
  56         saved_nval = nval;
  57         saved_sval = sval;
  58     }
  59
  60     public int nextToken() throws java.io.IOException {
  61         if (saved_ttype != TT_UNDEFINED) {
  62             ttype = saved_ttype;
  63             nval = saved_nval;
  64             sval = saved_sval;
  65             saved_ttype = TT_UNDEFINED;
  66             debug("using saved ttype=" + ttype + ", " +
  67                   "nval=" + nval + ", sval='" + sval + "'");
  68             return ttype;
  69         }
  70
  71         underlyingNextToken();
  72         if (ttype == '<') {
  73             debug("token starts with '<' ...");
  74             underlyingNextToken();
  75             if (ttype == '=') {
  76                 debug("token continues with '=' - it's '<='");
  77                 ttype = TT_LE;
  78             } else if (ttype == '>') {
  79                 debug("token continues with '>' - it's '<>'");
  80                 ttype = TT_NE;
  81             } else {
  82                 debug("next token is " + render() + " (pushed back)");
  83                 halfDecentPushBack();
  84                 ttype = '<';
  85                 debug("AFTER: ttype is now " + ttype + " - " + render());
  86             }
  87         } else if (ttype == '>') {
  88             debug("token starts with '>' ...");
  89             underlyingNextToken();
  90             if (ttype == '=') {
  91                 debug("token continues with '=' - it's '>='");
  92                 ttype = TT_GE;
  93             } else {
  94                 debug("next token is " + render() + " (pushed back)");
  95                 halfDecentPushBack();
  96                 ttype = '>';
  97                 debug("AFTER: ttype is now " + ttype + " - " + render());
  98             }
  99         }
 100
 101         debug("done nextToken(): ttype=" + ttype + ", " +
 102               "nval=" + nval + ", " + "sval='" + sval + "'" +
 103               " (" + render() + ")");
 104
 105         return ttype;
 106     }
 107
 108     // It's important to do keyword recognition here at the lowest
 109     // level, otherwise when one of these words follows "<" or ">"
 110     // (which can be the beginning of multi-character tokens) it gets
 111     // pushed back as a string, and its keywordiness is not
 112     // recognised.
 113     //
 114     public int underlyingNextToken() throws java.io.IOException {
 115         super.nextToken();
 116         if (ttype == TT_WORD) {
 117             if (sval.equalsIgnoreCase("and")) {
 118                 ttype = TT_AND;
 119             } else if (sval.equalsIgnoreCase("or")) {
 120                 ttype = TT_OR;
 121             } else if (sval.equalsIgnoreCase("not")) {
 122                 ttype = TT_NOT;
 123             } else if (sval.equalsIgnoreCase("prox")) {
 124                 ttype = TT_PROX;
 125             } else if (sval.equalsIgnoreCase("any")) {
 126                 ttype = TT_ANY;
 127             } else if (sval.equalsIgnoreCase("all")) {
 128                 ttype = TT_ALL;
 129             } else if (sval.equalsIgnoreCase("exact")) {
 130                 ttype = TT_EXACT;
 131             }
 132         }
 133         return ttype;
 134     }
 135
 136     // Simpler interface for the usual case: current token with quoting
 137     String render() {
 138         return render(ttype, true);
 139     }
 140
 141     String render(int token, boolean quoteChars) {
 142         if (token == TT_EOF) {
 143             return "EOF";
 144         } else if (token == TT_NUMBER) {
 145             return "number: " + nval;
 146         } else if (token == TT_WORD) {
 147             return "word: " + sval;
 148         } else if (token == '"') {
 149             return "string: \"" + sval + "\"";
 150         } else if (token == TT_LE) {
 151             return "<=";
 152         } else if (token == TT_GE) {
 153             return ">=";
 154         } else if (token == TT_NE) {
 155             return "<>";
 156         } else if (token == TT_AND) {
 157             return "and";
 158         } else if (token == TT_OR) {
 159             return "or";
 160         } else if (token == TT_NOT) {
 161             return "not";
 162         } else if (token == TT_PROX) {
 163             return "prox";
 164         } else if (token == TT_ANY) {
 165             return "any";
 166         } else if (token == TT_ALL) {
 167             return "all";
 168         } else if (token == TT_EXACT) {
 169             return "exact";
 170         }
 171
 172         String res = String.valueOf((char) token);
 173         if (quoteChars) res = "'" + res + "'";
 174         return res;
 175     }
 176
 177     public static void main(String[] args) throws Exception {
 178         CQLLexer lexer = new CQLLexer(args[0], true);
 179         int token;
 180
 181         while ((token = lexer.nextToken()) != TT_EOF) {
 182             // Nothing to do: debug() statements render tokens for us
 183         }
 184     }
 185 }