-// $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
package org.z3950.zing.cql;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
/**
* Compiles CQL strings into parse trees of CQLNode subtypes.
*
- * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $
* @see <A href="http://zing.z3950.org/cql/index.html"
* >http://zing.z3950.org/cql/index.html</A>
*/
public class CQLParser {
private CQLLexer lexer;
- private int compat; // When false, implement CQL 1.2
+ private PositionAwareReader par; //active reader with position
+ private final int compat; // When false, implement CQL 1.2
+ private final Set<String> customRelations = new HashSet<String>();
+
public static final int V1POINT1 = 12368;
public static final int V1POINT2 = 12369;
public static final int V1POINT1SORT = 12370;
+ public final boolean allowKeywordTerms;
static private boolean DEBUG = false;
static private boolean LEXDEBUG = false;
*/
public CQLParser(int compat) {
this.compat = compat;
+ this.allowKeywordTerms = true;
}
-
+
+ /**
+ * Official CQL grammar allows registered keywords like 'and/or/not/sortby/prox'
+ * to be used unquoted in terms. This constructor allows to create an instance
+ * of a parser that prohibits this behavior while sacrificing compatibility.
+ * @param compat CQL version compatibility
+ * @param allowKeywordTerms when false registered keywords are disallowed in unquoted terms
+ */
+ public CQLParser(int compat, boolean allowKeywordTerms) {
+ this.compat = V1POINT2;
+ this.allowKeywordTerms = allowKeywordTerms;
+ }
+
/**
* The new parser implements CQL 1.2
*/
public CQLParser() {
this.compat = V1POINT2;
+ this.allowKeywordTerms = true;
}
private static void debug(String str) {
}
/**
+ * Registers custom relation in this parser. Note that when a custom relation
+ * is registered the parser is no longer strictly compliant with the chosen spec.
+ * @param relation
+ * @return true if custom relation has not been registered already
+ */
+ public boolean registerCustomRelation(String relation) {
+ return customRelations.add(relation);
+ }
+
+ /**
+ * Unregisters previously registered custom relation in this instance of the parser.
+ * @param relation
+ * @return true is relation has been previously registered
+ */
+ public boolean unregisterCustomRelation(String relation) {
+ return customRelations.remove(relation);
+ }
+
+ /**
* Compiles a CQL query.
* <P>
* The resulting parse tree may be further processed by hand (see
* tree representing the query. */
public CQLNode parse(Reader cql)
throws CQLParseException, IOException {
- lexer = new CQLLexer(cql, LEXDEBUG);
+ par = new PositionAwareReader(cql);
+ lexer = new CQLLexer(par, LEXDEBUG);
lexer.nextToken();
debug("about to parseQuery()");
CQLNode root = parseTopLevelPrefixes("cql.serverChoice",
new CQLRelation(compat == V1POINT2 ? "=" : "scr"));
if (lexer.ttype != CQLLexer.TT_EOF)
- throw new CQLParseException("junk after end: " + lexer.render());
+ throw new CQLParseException("junk after end: " + lexer.render(),
+ par.getPosition());
return root;
}
}
if (sortnode.keys.size() == 0) {
- throw new CQLParseException("no sort keys");
+ throw new CQLParseException("no sort keys", par.getPosition());
}
node = sortnode;
new CQLProxNode(term, term2, ms));
} else {
throw new CQLParseException("expected boolean, got " +
- lexer.render());
+ lexer.render(), par.getPosition());
}
}
match('/');
if (lexer.ttype != CQLLexer.TT_WORD)
throw new CQLParseException("expected modifier, "
- + "got " + lexer.render());
+ + "got " + lexer.render(),
+ par.getPosition());
String type = lexer.sval.toLowerCase();
match(lexer.ttype);
if (!isSymbolicRelation()) {
lexer.sval.equals("all") ||
lexer.sval.equals("within") ||
lexer.sval.equals("encloses") ||
- lexer.sval.equals("exact") ||
+ (lexer.sval.equals("exact") && compat != V1POINT2) ||
(lexer.sval.equals("scr") && compat != V1POINT2) ||
- (lexer.sval.equals("adj") && compat == V1POINT2)))
+ (lexer.sval.equals("adj") && compat == V1POINT2) ||
+ customRelations.contains(lexer.sval)))
return true;
return isSymbolicRelation();
if (lexer.ttype != token)
throw new CQLParseException("expected " +
lexer.render(token, true) +
- ", " + "got " + lexer.render());
+ ", " + "got " + lexer.render(),
+ par.getPosition());
int tmp = lexer.nextToken();
debug("match() got token=" + lexer.ttype + ", " +
"nval=" + lexer.nval + ", sval='" + lexer.sval + "'" +
// indexes, terms, prefix names and prefix identifiers.
// ### Instead, we should ask the lexer whether what we
// have is a keyword, and let the knowledge reside there.
+ (allowKeywordTerms &&
lexer.ttype == CQLLexer.TT_AND ||
lexer.ttype == CQLLexer.TT_OR ||
lexer.ttype == CQLLexer.TT_NOT ||
lexer.ttype == CQLLexer.TT_PROX ||
- lexer.ttype == CQLLexer.TT_SORTBY) {
+ lexer.ttype == CQLLexer.TT_SORTBY)) {
String symbol = (lexer.ttype == CQLLexer.TT_NUMBER) ?
lexer.render() : lexer.sval;
match(lexer.ttype);
}
throw new CQLParseException("expected " + expected + ", " +
- "got " + lexer.render());
+ "got " + lexer.render(), par.getPosition());
}