X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Forg%2Fz3950%2Fzing%2Fcql%2FCQLParser.java;h=6bd6d017d77c0febec905529df98d29cf199e74d;hb=beabf8e9a3c5bd13dfafb0fb4bac0eb9fa0a90e5;hp=58e03269c8aa41bc9196ba9649b9f7e191f1fbbc;hpb=d58739419882639439b40b18fcefeb9e51488fb9;p=cql-java-moved-to-github.git diff --git a/src/main/java/org/z3950/zing/cql/CQLParser.java b/src/main/java/org/z3950/zing/cql/CQLParser.java index 58e0326..6bd6d01 100644 --- a/src/main/java/org/z3950/zing/cql/CQLParser.java +++ b/src/main/java/org/z3950/zing/cql/CQLParser.java @@ -1,27 +1,33 @@ -// $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ package org.z3950.zing.cql; import java.io.IOException; -import java.util.Vector; import java.util.Properties; import java.io.InputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.Reader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; /** * Compiles CQL strings into parse trees of CQLNode subtypes. * - * @version $Id: CQLParser.java,v 1.39 2007-08-06 15:54:48 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ public class CQLParser { private CQLLexer lexer; + private PositionAwareReader par; //active reader with position private int compat; // When false, implement CQL 1.2 - public static int V1POINT1 = 12368; - public static int V1POINT2 = 12369; - public static int V1POINT1SORT = 12370; + private final Set customRelations = new HashSet(); + + public static final int V1POINT1 = 12368; + public static final int V1POINT2 = 12369; + public static final int V1POINT1SORT = 12370; static private boolean DEBUG = false; static private boolean LEXDEBUG = false; @@ -54,6 +60,45 @@ public class CQLParser { if (DEBUG) System.err.println("PARSEDEBUG: " + str); } + + /** + * Registers custom relation in this parser. Note that when a custom relation + * is registered the parser is no longer strictly compliant with the chosen spec. + * @param relation + * @return true if custom relation has not been registered already + */ + public boolean registerCustomRelation(String relation) { + return customRelations.add(relation); + } + + /** + * Unregisters previously registered custom relation in this instance of the parser. + * @param relation + * @return true is relation has been previously registered + */ + public boolean unregisterCustomRelation(String relation) { + return customRelations.remove(relation); + } + + /** + * Compiles a CQL query. + *

+ * The resulting parse tree may be further processed by hand (see + * the individual node-types' documentation for details on the + * data structure) or, more often, simply rendered out in the + * desired form using one of the back-ends. toCQL() + * returns a decompiled CQL query equivalent to the one that was + * compiled in the first place; toXCQL() returns an + * XML snippet representing the query; and toPQF() + * returns the query rendered in Index Data's Prefix Query + * Format. + * + * @param cql The query + * @return A CQLNode object which is the root of a parse + * tree representing the query. */ + public CQLNode parse(String cql) throws CQLParseException, IOException { + return parse(new StringReader(cql)); + } /** * Compiles a CQL query. @@ -71,16 +116,18 @@ public class CQLParser { * @param cql The query * @return A CQLNode object which is the root of a parse * tree representing the query. */ - public CQLNode parse(String cql) + public CQLNode parse(Reader cql) throws CQLParseException, IOException { - lexer = new CQLLexer(cql, LEXDEBUG); + par = new PositionAwareReader(cql); + lexer = new CQLLexer(par, LEXDEBUG); lexer.nextToken(); debug("about to parseQuery()"); CQLNode root = parseTopLevelPrefixes("cql.serverChoice", new CQLRelation(compat == V1POINT2 ? "=" : "scr")); - if (lexer.ttype != lexer.TT_EOF) - throw new CQLParseException("junk after end: " + lexer.render()); + if (lexer.ttype != CQLLexer.TT_EOF) + throw new CQLParseException("junk after end: " + lexer.render(), + par.getPosition()); return root; } @@ -95,19 +142,19 @@ public class CQLParser { CQLNode node = parseQuery(index, relation); if ((compat == V1POINT2 || compat == V1POINT1SORT) && - lexer.ttype == lexer.TT_SORTBY) { + lexer.ttype == CQLLexer.TT_SORTBY) { match(lexer.ttype); debug("sortspec"); CQLSortNode sortnode = new CQLSortNode(node); - while (lexer.ttype != lexer.TT_EOF) { + while (lexer.ttype != CQLLexer.TT_EOF) { String sortindex = matchSymbol("sort index"); ModifierSet ms = gatherModifiers(sortindex); sortnode.addSortIndex(ms); } if (sortnode.keys.size() == 0) { - throw new CQLParseException("no sort keys"); + throw new CQLParseException("no sort keys", par.getPosition()); } node = sortnode; @@ -121,25 +168,25 @@ public class CQLParser { debug("in parseQuery()"); CQLNode term = parseTerm(index, relation); - while (lexer.ttype != lexer.TT_EOF && + while (lexer.ttype != CQLLexer.TT_EOF && lexer.ttype != ')' && - lexer.ttype != lexer.TT_SORTBY) { - if (lexer.ttype == lexer.TT_AND || - lexer.ttype == lexer.TT_OR || - lexer.ttype == lexer.TT_NOT || - lexer.ttype == lexer.TT_PROX) { + lexer.ttype != CQLLexer.TT_SORTBY) { + if (lexer.ttype == CQLLexer.TT_AND || + lexer.ttype == CQLLexer.TT_OR || + lexer.ttype == CQLLexer.TT_NOT || + lexer.ttype == CQLLexer.TT_PROX) { int type = lexer.ttype; String val = lexer.sval; match(type); ModifierSet ms = gatherModifiers(val); CQLNode term2 = parseTerm(index, relation); - term = ((type == lexer.TT_AND) ? new CQLAndNode(term, term2, ms) : - (type == lexer.TT_OR) ? new CQLOrNode (term, term2, ms) : - (type == lexer.TT_NOT) ? new CQLNotNode(term, term2, ms) : + term = ((type == CQLLexer.TT_AND) ? new CQLAndNode(term, term2, ms) : + (type == CQLLexer.TT_OR) ? new CQLOrNode (term, term2, ms) : + (type == CQLLexer.TT_NOT) ? new CQLNotNode(term, term2, ms) : new CQLProxNode(term, term2, ms)); } else { throw new CQLParseException("expected boolean, got " + - lexer.render()); + lexer.render(), par.getPosition()); } } @@ -154,12 +201,13 @@ public class CQLParser { ModifierSet ms = new ModifierSet(base); while (lexer.ttype == '/') { match('/'); - if (lexer.ttype != lexer.TT_WORD) + if (lexer.ttype != CQLLexer.TT_WORD) throw new CQLParseException("expected modifier, " - + "got " + lexer.render()); + + "got " + lexer.render(), + par.getPosition()); String type = lexer.sval.toLowerCase(); match(lexer.ttype); - if (!isRelation()) { + if (!isSymbolicRelation()) { // It's a simple modifier consisting of type only ms.addModifier(type); } else { @@ -192,16 +240,21 @@ public class CQLParser { debug("non-parenthesised term"); word = matchSymbol("index or term"); - if (!isRelation() && lexer.ttype != lexer.TT_WORD) + while (lexer.ttype == CQLLexer.TT_WORD && !isRelation()) { + word = word + " " + lexer.sval; + match(CQLLexer.TT_WORD); + } + + if (!isRelation()) break; index = word; - String relstr = (lexer.ttype == lexer.TT_WORD ? + String relstr = (lexer.ttype == CQLLexer.TT_WORD ? lexer.sval : lexer.render(lexer.ttype, false)); relation = new CQLRelation(relstr); match(lexer.ttype); ModifierSet ms = gatherModifiers(relstr); - relation.setModifiers(ms); + relation.ms = ms; debug("index='" + index + ", " + "relation='" + relation.toCQL() + "'"); } @@ -231,17 +284,34 @@ public class CQLParser { return new CQLPrefixNode(name, identifier, node); } - // Checks for a relation private boolean isRelation() { debug("isRelation: checking ttype=" + lexer.ttype + " (" + lexer.render() + ")"); + if (lexer.ttype == CQLLexer.TT_WORD && + (lexer.sval.indexOf('.') >= 0 || + lexer.sval.equals("any") || + lexer.sval.equals("all") || + lexer.sval.equals("within") || + lexer.sval.equals("encloses") || + (lexer.sval.equals("exact") && compat != V1POINT2) || + (lexer.sval.equals("scr") && compat != V1POINT2) || + (lexer.sval.equals("adj") && compat == V1POINT2) || + customRelations.contains(lexer.sval))) + return true; + + return isSymbolicRelation(); + } + + private boolean isSymbolicRelation() { + debug("isSymbolicRelation: checking ttype=" + lexer.ttype + + " (" + lexer.render() + ")"); return (lexer.ttype == '<' || lexer.ttype == '>' || lexer.ttype == '=' || - lexer.ttype == lexer.TT_LE || - lexer.ttype == lexer.TT_GE || - lexer.ttype == lexer.TT_NE || - lexer.ttype == lexer.TT_EQEQ); + lexer.ttype == CQLLexer.TT_LE || + lexer.ttype == CQLLexer.TT_GE || + lexer.ttype == CQLLexer.TT_NE || + lexer.ttype == CQLLexer.TT_EQEQ); } private void match(int token) @@ -250,7 +320,8 @@ public class CQLParser { if (lexer.ttype != token) throw new CQLParseException("expected " + lexer.render(token, true) + - ", " + "got " + lexer.render()); + ", " + "got " + lexer.render(), + par.getPosition()); int tmp = lexer.nextToken(); debug("match() got token=" + lexer.ttype + ", " + "nval=" + lexer.nval + ", sval='" + lexer.sval + "'" + @@ -261,27 +332,27 @@ public class CQLParser { throws CQLParseException, IOException { debug("in matchSymbol()"); - if (lexer.ttype == lexer.TT_WORD || - lexer.ttype == lexer.TT_NUMBER || + if (lexer.ttype == CQLLexer.TT_WORD || + lexer.ttype == CQLLexer.TT_NUMBER || lexer.ttype == '"' || // The following is a complete list of keywords. Because // they're listed here, they can be used unquoted as // indexes, terms, prefix names and prefix identifiers. // ### Instead, we should ask the lexer whether what we // have is a keyword, and let the knowledge reside there. - lexer.ttype == lexer.TT_AND || - lexer.ttype == lexer.TT_OR || - lexer.ttype == lexer.TT_NOT || - lexer.ttype == lexer.TT_PROX || - lexer.ttype == lexer.TT_SORTBY) { - String symbol = (lexer.ttype == lexer.TT_NUMBER) ? + lexer.ttype == CQLLexer.TT_AND || + lexer.ttype == CQLLexer.TT_OR || + lexer.ttype == CQLLexer.TT_NOT || + lexer.ttype == CQLLexer.TT_PROX || + lexer.ttype == CQLLexer.TT_SORTBY) { + String symbol = (lexer.ttype == CQLLexer.TT_NUMBER) ? lexer.render() : lexer.sval; match(lexer.ttype); return symbol; } throw new CQLParseException("expected " + expected + ", " + - "got " + lexer.render()); + "got " + lexer.render(), par.getPosition()); } @@ -348,7 +419,7 @@ public class CQLParser { char mode = 'x'; // x=XCQL, c=CQL, p=PQF String pfile = null; - Vector argv = new Vector(); + List argv = new ArrayList(); for (int i = 0; i < args.length; i++) { argv.add(args[i]); } @@ -397,6 +468,7 @@ public class CQLParser { } CQLParser parser = new CQLParser(compat); + parser.registerCustomRelation("@"); CQLNode root = null; try { root = parser.parse(cql); @@ -421,7 +493,7 @@ public class CQLParser { f.close(); System.out.println(root.toPQF(config)); } else { - System.out.print(root.toXCQL(0)); + System.out.print(root.toXCQL()); } } catch (IOException ex) { System.err.println("Can't render query: " + ex.getMessage());