From f8154c71944186a9b64ddb782082a2026c5a912f Mon Sep 17 00:00:00 2001 From: mike Date: Thu, 31 Oct 2002 22:22:01 +0000 Subject: [PATCH] All sorts of changes. Generally moving towards first release. --- Grammar | 35 ------- README | 41 +++++--- bin/CQLLexer | 6 ++ bin/CQLParser | 6 ++ etc/Grammar | 35 +++++++ etc/grammar-mail | 138 ++++++++++++++++++++++++++ src/org/z3950/zing/cql/CQLAndNode.java | 6 +- src/org/z3950/zing/cql/CQLBooleanNode.java | 14 ++- src/org/z3950/zing/cql/CQLLexer.java | 149 ++++++++++++++++++---------- src/org/z3950/zing/cql/CQLNode.java | 47 ++------- src/org/z3950/zing/cql/CQLNotNode.java | 6 +- src/org/z3950/zing/cql/CQLOrNode.java | 6 +- src/org/z3950/zing/cql/CQLParser.java | 118 ++++++++++++++++++---- src/org/z3950/zing/cql/CQLProxNode.java | 37 +++++++ src/org/z3950/zing/cql/CQLRelation.java | 61 ++---------- src/org/z3950/zing/cql/CQLTermNode.java | 17 ++-- src/org/z3950/zing/cql/Makefile | 7 +- src/org/z3950/zing/cql/ModifierSet.java | 95 ++++++++++++++++++ 18 files changed, 589 insertions(+), 235 deletions(-) delete mode 100644 Grammar create mode 100755 bin/CQLLexer create mode 100755 bin/CQLParser create mode 100644 etc/Grammar create mode 100644 etc/grammar-mail create mode 100644 src/org/z3950/zing/cql/CQLProxNode.java create mode 100644 src/org/z3950/zing/cql/ModifierSet.java diff --git a/Grammar b/Grammar deleted file mode 100644 index bd51793..0000000 --- a/Grammar +++ /dev/null @@ -1,35 +0,0 @@ -$Id: Grammar,v 1.1 2002-10-30 09:19:26 mike Exp $ - -This is the CQL grammar, more or less as on the official Maintenance -Agency page (http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html) but -with a few tweaks described in my message of Tue, 29 Oct 2002 14:11:48 -which I hope will be integrated into the official grammar. - --- - -cql-query ::= cql-query boolean search-clause - | search-clause -boolean ::= "and" | "or" | "not" | prox -search-clause ::= "(" cql-query ")" - | [ qualifier relation ] term - -relation ::= base-relation { "/" relation-modifier } -base-relation ::= numeric-relation | "exact" | "all" | "any" -relation-modifier ::= "relevant" | "fuzzy" | "stem" -numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "=" - -prox ::= "prox" [ "/" prox-parameters ] -prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering - | [ numeric-relation ] "/" [ distance ] "/" unit - | [ numeric-relation ] "/" distance - | numeric-relation -unit ::= "word" | "sentence" | "paragraph" | "element" -ordering ::= "ordered" | "unordered" -distance ::= non-negative-integer - -qualifier ::= [ qualifier-prefix "." ] qualifier-name -qualifier-prefix ::= identifier -qualifier-name ::= identifier -identifer ::= string -term ::= string | ""string"" -string ::= a character string diff --git a/README b/README index 590fff1..ed5afc8 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -$Id: README,v 1.6 2002-10-30 11:13:18 mike Exp $ +$Id: README,v 1.7 2002-10-31 22:22:01 mike Exp $ cql-java -- a free CQL compiler for Java @@ -53,8 +53,8 @@ Library: DESCRIPTION ----------- -Se the automatically generated class documentation in the "doc" -subdirectory. (### It's not there yet, of course) +See the automatically generated class documentation in the "doc" +subdirectory. (It's not all there yet, but it's coming.) AUTHOR @@ -83,19 +83,37 @@ All the other free CQL compilers everyone's going to write :-) TO DO ----- -* Add proximity support to parser +* Add proximity support to parser -- just the back-ends left to do. -* Some niceties for the CQL-decompiling back-end: - * Don't emit redundant parentheses. - * Don't put spaces around relations that don't need them. +* Relation modifiers could be limited to known modifiers only. -* Write PQN-generating back-end (will need to be driven from a +* Fix CQLParser and CQLLexer shell-script front-ends to elegantly + handle their classes' test harnesses' ability to read the query from + the command-line arguments, if any, falling back to stdin if there + are none. + +* Add CQLGenerate shell-script. Allow CQLGenerate test-harness to + take some arguments on command-line as well as or instead of a + file. + +* Trivial CQLCanonicalise application, which renders out its source + tree in a canonical form, enabling queries to be diffed for + semantically significant differences only. Tests can be run by + generating random trees, canonicalising them, then canonicalising + them _again_ and checking that the before-and-after results are the + same. + +* Some niceties for the cql-decompiling back-end: + * don't emit redundant parentheses. + * don't put spaces around relations that don't need them. + +* Write pqn-generating back-end (will need to be driven from a configuation file specifying how to represent the qualifiers, - relations, relation modifiers and wildcard characters as Z39.50 + relations, relation modifiers and wildcard characters as z39.50 attributes.) * Consider the utility of yet another back-end that translates a - CQLNode tree into a Type-1 query tree using the JZKit data + cqlnode tree into a type-1 query tree using the jzkit data structures. That would be nice so that CQL could become a JZKit query-type, but you could achieve the same effect by generating PQN, and running that through JZKit's existing PQN-to-Type-1 compiler. @@ -113,6 +131,3 @@ TO DO * Write generic test suite. -* Fix CQLParser test harness to read query from command-line - arguments, if any, falling back to stdin if there are none. - diff --git a/bin/CQLLexer b/bin/CQLLexer new file mode 100755 index 0000000..c7a5822 --- /dev/null +++ b/bin/CQLLexer @@ -0,0 +1,6 @@ +#!/bin/sh + +# $Id: CQLLexer,v 1.1 2002-10-31 22:22:01 mike Exp $ +# Trivial script to invoke the CQLLexer test-harness + +cat | java org.z3950.zing.cql.CQLLexer ${@+"$@"} diff --git a/bin/CQLParser b/bin/CQLParser new file mode 100755 index 0000000..9576625 --- /dev/null +++ b/bin/CQLParser @@ -0,0 +1,6 @@ +#!/bin/sh + +# $Id: CQLParser,v 1.1 2002-10-31 22:22:01 mike Exp $ +# Trivial script to invoke the CQLParser test-harness + +cat | java org.z3950.zing.cql.CQLParser ${@+"$@"} diff --git a/etc/Grammar b/etc/Grammar new file mode 100644 index 0000000..ea1ad0e --- /dev/null +++ b/etc/Grammar @@ -0,0 +1,35 @@ +$Id: Grammar,v 1.1 2002-10-31 22:22:01 mike Exp $ + +This is the CQL grammar, more or less as on the official Maintenance +Agency page (http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html) but +with a few tweaks described in my message of Tue, 29 Oct 2002 14:11:48 +which I hope will be integrated into the official grammar. + +-- + +cql-query ::= cql-query boolean search-clause + | search-clause +boolean ::= "and" | "or" | "not" | prox +search-clause ::= "(" cql-query ")" + | [ qualifier relation ] term + +relation ::= base-relation { "/" relation-modifier } +base-relation ::= numeric-relation | "exact" | "all" | "any" +relation-modifier ::= "relevant" | "fuzzy" | "stem" +numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "=" + +prox ::= "prox" [ "/" prox-parameters ] +prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering + | [ numeric-relation ] "/" [ distance ] "/" unit + | [ numeric-relation ] "/" distance + | numeric-relation +unit ::= "word" | "sentence" | "paragraph" | "element" +ordering ::= "ordered" | "unordered" +distance ::= non-negative-integer + +qualifier ::= [ qualifier-prefix "." ] qualifier-name +qualifier-prefix ::= identifier +qualifier-name ::= identifier +identifer ::= string +term ::= string | ""string"" +string ::= a character string diff --git a/etc/grammar-mail b/etc/grammar-mail new file mode 100644 index 0000000..21cce8f --- /dev/null +++ b/etc/grammar-mail @@ -0,0 +1,138 @@ +From mike@seatbooker.net Tue Oct 29 15:12:09 2002 +Envelope-to: mike@miketaylor.org.uk +Date: Tue, 29 Oct 2002 14:11:48 GMT +From: Mike Taylor +To: ZNG@loc.gov +Cc: mike@miketaylor.org.uk +Subject: Again: Grammar Tweaks + +Dear Everyone, + +I sent this message last Friday, and didn't get a delivery failure +message or anything similar; but there has been absolutely zero +response on-list, which makes me wonder whether it mysteriously didn't +get through. + +... or surely it didn't get caught by people's "this message is too +complicated to pay attention to" filters? :-~ + + _/|_ _______________________________________________________________ +/o ) \/ Mike Taylor www.miketaylor.org.uk +)_v__/\ "Conclusion: is left to the reader (see Table 2). + Acknowledgements: I wrote this paper for money" -- + A. A. Chastel, _A critical analysis of the explanation of + red-shifts by a new field_, A&A 53, 67 (1976) + + +------------------------------- cut here ------------------------------- +Well, it looks like the CQL grammar has settled down more or less to +everyone's satisfaction. So it must be time to throw it all up the +air again! :-) + +No, I'm joking -- mostly. I'd like to point one actual mistake (I +think), suggest one substantive change, and request a few cosmetic +changes. + +For anyone who's not got it to hand, the URL for the grammar is +http://lcweb.loc.gov/z3950/agency/zing/srwu/cql.html + +1. I think it's a mistake that the grammar says: + prox-qualifiers ::= "/" [ unit ] "/" [ relation ] "/" [ distance ] "/" ordering + (and the similar productions that follow) because that allows + prox/word/exact/3 <--- "exact" is meaningless here + and -- even worse -- + prox/word/=/stem <--- a relation-modifier! + (This is not only silly, but ambiguous too) + + So I think all the occurrences of "relation" in the productions + for prox need to be changed to "order-or-equal-relation". + +2. The only thing that I'm suggesting we actually _change_ is the + order of the proximity parameters. Quick! Close your eyes and + tell me the correct order of relation, ordering, distance and + unit? See -- you can't do it: no-one can :-) + + So, based somewhat on Adam's rather more difficult suggestion of + a couple of days ago, I propose that we change the order to: + relation/distance/unit/ordering + Rationale: you can read it out loud. If you want to find two + clauses with the conditions "*more* than *5* *sentences* apart", + you would write ``foo prox/>/5/sentence bar''. + +3. Cosmetic changes. + + 3a. The "/" at the beginning of each of the prox-qualfiers + productions can be moved up into the definition of prox, like + this: + prox::= "prox" [ "/" prox-qualifiers ] + which yields a slightly simpler, neater (but equivalent) + grammar. + + 3b. The things that the grammar called "index-name", we have been + calling "qualifiers" (and talking about the "qualifier-sets" + that contain them.) I think that's a much nicer name than + "index-name", in part because it doesn't carry such a loading + of implementation detail. Also, remember that we way we've + designed things, a qualifier will typically implemented by + multiple indexes (a word index and a string index) so I don't + want to give misleading impressions. + + 3b1. :-) + That would mean that, in the name of simplicity, we'd + need to rename "prox-qualifiers" to something like + "prox-modifiers" or "prox-parameters" (which is what + we've actually been calling them, 4WIW) and rename + "qualifier" to something more suggestive such as + "relation-modifier" (which, again, is what we've been + using in prose.) + + 3c. (Nearly done, honest.) I think that + "order-or-equal-relation" is a horrible name and would much + prefer to call it something like "numeric-relation", which + better explains its role in, for example, proximity + parameters. + +So, putting it all together, here's how I think the grammar should +look: + +------------------------------- cut here ------------------------------- +cql-query ::= cql-query boolean search-clause + | search-clause +boolean ::= "and" | "or" | "not" | prox +search-clause ::= "(" cql-query ")" + | [ qualifier relation ] term + +relation ::= base-relation { "/" relation-modifier } +base-relation ::= numeric-relation | "exact" | "all" | "any" +relation-modifier ::= "relevant" | "fuzzy" | "stem" +numeric-relation ::= "<" | ">" | "<=" | ">=" | "<>" | "=" + +prox ::= "prox" [ "/" prox-parameters ] +prox-parameters ::= [ numeric-relation ] "/" [ distance ] "/" [ unit ] "/" ordering + | [ numeric-relation ] "/" [ distance ] "/" unit + | [ numeric-relation ] "/" distance + | numeric-relation +unit ::= "word" | "sentence" | "paragraph" | "element" +ordering ::= "ordered" | "unordered" +distance ::= non-negative-integer + +qualifier ::= [ qualifier-prefix "." ] qualifier-name +qualifier-prefix ::= identifier +qualifier-name ::= identifier +identifer ::= string +term ::= string | ""string"" +string ::= a character string +------------------------------- cut here ------------------------------- + +Hope this helps, and that it's none of it's controversial. I guess it +ought not to be, except maybe the change in the order of proximity +parameters. + + _/|_ _______________________________________________________________ +/o ) \/ Mike Taylor www.miketaylor.org.uk +)_v__/\ The IBM 360 had no stack, and that was stupid, short-sighted + design. The Cray 2 has no stack either, but that's elegant + minimalism. + + + diff --git a/src/org/z3950/zing/cql/CQLAndNode.java b/src/org/z3950/zing/cql/CQLAndNode.java index 1893a3e..57cde12 100644 --- a/src/org/z3950/zing/cql/CQLAndNode.java +++ b/src/org/z3950/zing/cql/CQLAndNode.java @@ -1,13 +1,13 @@ -// $Id: CQLAndNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLAndNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents an AND node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLAndNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLAndNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ */ public class CQLAndNode extends CQLBooleanNode { public CQLAndNode(CQLNode left, CQLNode right) { diff --git a/src/org/z3950/zing/cql/CQLBooleanNode.java b/src/org/z3950/zing/cql/CQLBooleanNode.java index 489e39b..56c212f 100644 --- a/src/org/z3950/zing/cql/CQLBooleanNode.java +++ b/src/org/z3950/zing/cql/CQLBooleanNode.java @@ -1,13 +1,13 @@ -// $Id: CQLBooleanNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLBooleanNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents a boolean node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLBooleanNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLBooleanNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $ */ public abstract class CQLBooleanNode extends CQLNode { protected CQLNode left; @@ -17,12 +17,18 @@ public abstract class CQLBooleanNode extends CQLNode { String toXCQL(int level) { return (indent(level) + "\n" + - indent(level+1) + "" + op() + "\n" + + booleanXQL(level+1) + left.toXCQL(level+1) + right.toXCQL(level+1) + indent(level) + "\n"); } + String booleanXQL(int level) { + return(indent(level) + "\n" + + indent(level+1) + "" + op() + "\n" + + indent(level) + "\n"); + } + String toCQL() { // ### We don't always need parens around the operands return "(" + left.toCQL() + ") " + op() + " (" + right.toCQL() + ")"; diff --git a/src/org/z3950/zing/cql/CQLLexer.java b/src/org/z3950/zing/cql/CQLLexer.java index 1dc580e..52b81f7 100644 --- a/src/org/z3950/zing/cql/CQLLexer.java +++ b/src/org/z3950/zing/cql/CQLLexer.java @@ -1,32 +1,76 @@ -// $Id: CQLLexer.java,v 1.1 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLLexer.java,v 1.2 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; import java.io.StreamTokenizer; import java.io.StringReader; +import java.util.Hashtable; -// This is a trivial subclass for java.io.StreamTokenizer which knows -// about the multi-character tokens "<=", ">=" and "<>", and includes -// a render() method. Used only by CQLParser. +// This is a semi-trivial subclass for java.io.StreamTokenizer that: +// * Has a halfDecentPushBack() method that actually works +// * Includes a render() method +// * Knows about the multi-character tokens "<=", ">=" and "<>" +// * Recognises a set of keywords as tokens in their own right +// * Includes some primitive debugging-output facilities +// It's used only by CQLParser. // class CQLLexer extends StreamTokenizer { - private static boolean DEBUG; - static int TT_LE = 1000; // The "<=" relation - static int TT_GE = 1001; // The ">=" relation - static int TT_NE = 1002; // The "<>" relation - static int TT_AND = 1003; // The "and" boolean - static int TT_OR = 1004; // The "or" boolean - static int TT_NOT = 1005; // The "not" boolean - static int TT_PROX = 1006; // The "prox" boolean - static int TT_ANY = 1007; // The "any" relation - static int TT_ALL = 1008; // The "all" relation - static int TT_EXACT = 1009; // The "exact" relation + // New publicly visible token-types + static int TT_LE = 1000; // The "<=" relation + static int TT_GE = 1001; // The ">=" relation + static int TT_NE = 1002; // The "<>" relation + static int TT_AND = 1003; // The "and" boolean + static int TT_OR = 1004; // The "or" boolean + static int TT_NOT = 1005; // The "not" boolean + static int TT_PROX = 1006; // The "prox" boolean + static int TT_ANY = 1007; // The "any" relation + static int TT_ALL = 1008; // The "all" relation + static int TT_EXACT = 1009; // The "exact" relation + static int TT_pWORD = 1010; // The "word" proximity unit + static int TT_SENTENCE = 1011; // The "sentence" proximity unit + static int TT_PARAGRAPH = 1012; // The "paragraph" proximity unit + static int TT_ELEMENT = 1013; // The "element" proximity unit + static int TT_ORDERED = 1014; // The "ordered" proximity ordering + static int TT_UNORDERED = 1015; // The "unordered" proximity ordering + + // Support for keywords. It would be nice to compile this linear + // list into a Hashtable, but it's hard to store ints as hash + // values, and next to impossible to use them as hash keys. So + // we'll just scan the (very short) list every time we need to do + // a lookup. + private class Keyword { + int token; + String keyword; + Keyword(int token, String keyword) { + this.token = token; + this.keyword = keyword; + } + } + // This should logically be static, but Java won't allow it :-P + private Keyword[] keywords = { + new Keyword(TT_AND, "and"), + new Keyword(TT_OR, "or"), + new Keyword(TT_NOT, "not"), + new Keyword(TT_PROX, "prox"), + new Keyword(TT_ANY, "any"), + new Keyword(TT_ALL, "all"), + new Keyword(TT_EXACT, "exact"), + new Keyword(TT_pWORD, "word"), + new Keyword(TT_SENTENCE, "sentence"), + new Keyword(TT_PARAGRAPH, "paragraph"), + new Keyword(TT_ELEMENT, "element"), + new Keyword(TT_ORDERED, "ordered"), + new Keyword(TT_UNORDERED, "unordered"), + }; // For halfDecentPushBack() and the code at the top of nextToken() private static int TT_UNDEFINED = -1000; - int saved_ttype = TT_UNDEFINED; - double saved_nval; - String saved_sval; + private int saved_ttype = TT_UNDEFINED; + private double saved_nval; + private String saved_sval; + + // Controls debugging output + private static boolean DEBUG; CQLLexer(String cql, boolean lexdebug) { super(new StringReader(cql)); @@ -37,6 +81,7 @@ class CQLLexer extends StreamTokenizer { ordinaryChar('('); ordinaryChar(')'); wordChars('\'', '\''); // prevent this from introducing strings + parseNumbers(); DEBUG = lexdebug; } @@ -113,23 +158,11 @@ class CQLLexer extends StreamTokenizer { // public int underlyingNextToken() throws java.io.IOException { super.nextToken(); - if (ttype == TT_WORD) { - if (sval.equalsIgnoreCase("and")) { - ttype = TT_AND; - } else if (sval.equalsIgnoreCase("or")) { - ttype = TT_OR; - } else if (sval.equalsIgnoreCase("not")) { - ttype = TT_NOT; - } else if (sval.equalsIgnoreCase("prox")) { - ttype = TT_PROX; - } else if (sval.equalsIgnoreCase("any")) { - ttype = TT_ANY; - } else if (sval.equalsIgnoreCase("all")) { - ttype = TT_ALL; - } else if (sval.equalsIgnoreCase("exact")) { - ttype = TT_EXACT; - } - } + if (ttype == TT_WORD) + for (int i = 0; i < keywords.length; i++) + if (sval.equalsIgnoreCase(keywords[i].keyword)) + ttype = keywords[i].token; + return ttype; } @@ -142,7 +175,7 @@ class CQLLexer extends StreamTokenizer { if (token == TT_EOF) { return "EOF"; } else if (token == TT_NUMBER) { - return "number: " + nval; + return new Integer((int) nval).toString(); } else if (token == TT_WORD) { return "word: " + sval; } else if (token == '"') { @@ -153,31 +186,43 @@ class CQLLexer extends StreamTokenizer { return ">="; } else if (token == TT_NE) { return "<>"; - } else if (token == TT_AND) { - return "and"; - } else if (token == TT_OR) { - return "or"; - } else if (token == TT_NOT) { - return "not"; - } else if (token == TT_PROX) { - return "prox"; - } else if (token == TT_ANY) { - return "any"; - } else if (token == TT_ALL) { - return "all"; - } else if (token == TT_EXACT) { - return "exact"; } + // Check whether its associated with one of the keywords + for (int i = 0; i < keywords.length; i++) + if (token == keywords[i].token) + return keywords[i].keyword; + + // Otherwise it must be a single character, such as '(' or '/'. String res = String.valueOf((char) token); if (quoteChars) res = "'" + res + "'"; return res; } public static void main(String[] args) throws Exception { - CQLLexer lexer = new CQLLexer(args[0], true); - int token; + if (args.length > 1) { + System.err.println("Usage: CQLLexer []"); + System.err.println("If unspecified, query is read from stdin"); + System.exit(1); + } + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); + } + + CQLLexer lexer = new CQLLexer(cql, true); + int token; while ((token = lexer.nextToken()) != TT_EOF) { // Nothing to do: debug() statements render tokens for us } diff --git a/src/org/z3950/zing/cql/CQLNode.java b/src/org/z3950/zing/cql/CQLNode.java index 5d731b7..4eb8023 100644 --- a/src/org/z3950/zing/cql/CQLNode.java +++ b/src/org/z3950/zing/cql/CQLNode.java @@ -1,56 +1,21 @@ -// $Id: CQLNode.java,v 1.8 2002-10-30 11:13:18 mike Exp $ +// $Id: CQLNode.java,v 1.9 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents a node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLNode.java,v 1.8 2002-10-30 11:13:18 mike Exp $ + * @version $Id: CQLNode.java,v 1.9 2002-10-31 22:22:01 mike Exp $ */ public abstract class CQLNode { abstract String toXCQL(int level); abstract String toCQL(); - protected String indent(int level) { - String x = ""; - while (level-- > 0) { - x += " "; - } - return x; - } - - // XML Quote -- - // s/&/&/g; - // s//>/g; - // This is hideously inefficient, but I just don't see a better - // way using the standard JAVA library. - // - protected String xq(String str) { - str = replace(str, "&", "&"); - str = replace(str, "<", "<"); - str = replace(str, ">", ">"); - return str; - } - - // I can't _believe_ I have to write this by hand in 2002 ... - protected static String replace(String str, String from, String to) { - StringBuffer sb = new StringBuffer(); - int ix; // index of next `from' - int offset = 0; // index of previous `from' + length(from) - - while ((ix = str.indexOf(from, offset)) != -1) { - sb.append(str.substring(offset, ix)); - sb.append(to); - offset = ix + from.length(); - } - - // End of string: append last bit and we're done - sb.append(str.substring(offset)); - return sb.toString(); - } + // Utility-function abbreviations for the use of subclasses + protected static String indent(int level) { return Utils.indent(level); } + protected static String xq(String str) { return Utils.xq(str); } // Test harness public static void main (String[] args) { diff --git a/src/org/z3950/zing/cql/CQLNotNode.java b/src/org/z3950/zing/cql/CQLNotNode.java index 312cf94..3eb1f7a 100644 --- a/src/org/z3950/zing/cql/CQLNotNode.java +++ b/src/org/z3950/zing/cql/CQLNotNode.java @@ -1,13 +1,13 @@ -// $Id: CQLNotNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLNotNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents a NOT node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLNotNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLNotNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ */ public class CQLNotNode extends CQLBooleanNode { public CQLNotNode(CQLNode left, CQLNode right) { diff --git a/src/org/z3950/zing/cql/CQLOrNode.java b/src/org/z3950/zing/cql/CQLOrNode.java index e24bb04..7dc253c 100644 --- a/src/org/z3950/zing/cql/CQLOrNode.java +++ b/src/org/z3950/zing/cql/CQLOrNode.java @@ -1,13 +1,13 @@ -// $Id: CQLOrNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLOrNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents an OR node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLOrNode.java,v 1.3 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLOrNode.java,v 1.4 2002-10-31 22:22:01 mike Exp $ */ public class CQLOrNode extends CQLBooleanNode { public CQLOrNode(CQLNode left, CQLNode right) { diff --git a/src/org/z3950/zing/cql/CQLParser.java b/src/org/z3950/zing/cql/CQLParser.java index e463576..0aded47 100644 --- a/src/org/z3950/zing/cql/CQLParser.java +++ b/src/org/z3950/zing/cql/CQLParser.java @@ -1,4 +1,4 @@ -// $Id: CQLParser.java,v 1.10 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; import java.io.IOException; @@ -6,9 +6,9 @@ import java.io.IOException; /** * Compiles a CQL string into a parse tree. - * ### + * ## * - * @version $Id: CQLParser.java,v 1.10 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLParser.java,v 1.11 2002-10-31 22:22:01 mike Exp $ * @see http://zing.z3950.org/cql/index.html */ @@ -55,7 +55,12 @@ public class CQLParser { CQLNode term2 = parse_term(qualifier, relation); term = new CQLNotNode(term, term2); } else if (lexer.ttype == lexer.TT_PROX) { - // ### Handle "prox" + match(lexer.TT_PROX); + CQLProxNode proxnode = new CQLProxNode(term); + gatherProxParameters(proxnode); + CQLNode term2 = parse_term(qualifier, relation); + proxnode.addSecondSubterm(term2); + term = (CQLNode) proxnode; } else { throw new CQLParseException("expected boolean, got " + lexer.render()); @@ -112,18 +117,86 @@ public class CQLParser { return node; } + private void gatherProxParameters(CQLProxNode node) + throws CQLParseException, IOException { + for (int i = 0; i < 4; i++) { + if (lexer.ttype != '/') + return; // end of proximity parameters + + match('/'); + if (lexer.ttype != '/') { + // not an omitted default + switch (i) { + // Assumes order is: relation/distance/unit/ordering + case 0: gatherProxRelation(node); break; + case 1: gatherProxDistance(node); break; + case 2: gatherProxUnit(node); break; + case 3: gatherProxOrdering(node); break; + } + } + } + } + + private void gatherProxRelation(CQLProxNode node) + throws CQLParseException, IOException { + if (!isProxRelation()) + throw new CQLParseException("expected proximity relation, got " + + lexer.render()); + node.addModifier("relation", lexer.render(lexer.ttype, false)); + match(lexer.ttype); + debug("gPR matched " + lexer.render(lexer.ttype, false)); + } + + private void gatherProxDistance(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_NUMBER) + throw new CQLParseException("expected proximity distance, got " + + lexer.render()); + node.addModifier("distance", lexer.render(lexer.ttype, false)); + match(lexer.ttype); + debug("gPD matched " + lexer.render(lexer.ttype, false)); + } + + private void gatherProxUnit(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_pWORD && + lexer.ttype != lexer.TT_SENTENCE && + lexer.ttype != lexer.TT_PARAGRAPH && + lexer.ttype != lexer.TT_ELEMENT) + throw new CQLParseException("expected proximity unit, got " + + lexer.render()); + node.addModifier("unit", lexer.render()); + match(lexer.ttype); + } + + private void gatherProxOrdering(CQLProxNode node) + throws CQLParseException, IOException { + if (lexer.ttype != lexer.TT_ORDERED && + lexer.ttype != lexer.TT_UNORDERED) + throw new CQLParseException("expected proximity ordering, got " + + lexer.render()); + node.addModifier("ordering", lexer.render()); + match(lexer.ttype); + } + boolean isBaseRelation() { debug("isBaseRelation: checking ttype=" + lexer.ttype + " (" + lexer.render() + ")"); + return (isProxRelation() || + lexer.ttype == lexer.TT_ANY || + lexer.ttype == lexer.TT_ALL || + lexer.ttype == lexer.TT_EXACT); + } + + boolean isProxRelation() { + debug("isProxRelation: checking ttype=" + lexer.ttype + + " (" + lexer.render() + ")"); return (lexer.ttype == '<' || lexer.ttype == '>' || lexer.ttype == '=' || lexer.ttype == lexer.TT_LE || lexer.ttype == lexer.TT_GE || - lexer.ttype == lexer.TT_NE || - lexer.ttype == lexer.TT_ANY || - lexer.ttype == lexer.TT_ALL || - lexer.ttype == lexer.TT_EXACT); + lexer.ttype == lexer.TT_NE); } private void match(int token) @@ -168,26 +241,33 @@ public class CQLParser { // // public static void main (String[] args) { - if (args.length != 0) { - System.err.println("Usage: " + args[0]); + if (args.length > 1) { + System.err.println("Usage: CQLParser []"); + System.err.println("If unspecified, query is read from stdin"); System.exit(1); } - byte[] bytes = new byte[10000]; - try { - // Read in the whole of standard input in one go - int nbytes = System.in.read(bytes); - } catch (java.io.IOException ex) { - System.err.println("Can't read query: " + ex.getMessage()); - System.exit(2); + String cql; + if (args.length == 1) { + cql = args[0]; + } else { + byte[] bytes = new byte[10000]; + try { + // Read in the whole of standard input in one go + int nbytes = System.in.read(bytes); + } catch (java.io.IOException ex) { + System.err.println("Can't read query: " + ex.getMessage()); + System.exit(2); + } + cql = new String(bytes); } - String cql = new String(bytes); + CQLParser parser = new CQLParser(); CQLNode root; try { root = parser.parse(cql); debug("root='" + root + "'"); - System.out.println(root.toXCQL(0)); + System.out.println(root.toCQL()); } catch (CQLParseException ex) { System.err.println("Syntax error: " + ex.getMessage()); System.exit(3); diff --git a/src/org/z3950/zing/cql/CQLProxNode.java b/src/org/z3950/zing/cql/CQLProxNode.java new file mode 100644 index 0000000..1b9abe9 --- /dev/null +++ b/src/org/z3950/zing/cql/CQLProxNode.java @@ -0,0 +1,37 @@ +// $Id: CQLProxNode.java,v 1.1 2002-10-31 22:22:01 mike Exp $ + +package org.z3950.zing.cql; + + +/** + * Represents a proximity node in a CQL parse-tree. + * ## + * + * @version $Id: CQLProxNode.java,v 1.1 2002-10-31 22:22:01 mike Exp $ + */ +public class CQLProxNode extends CQLBooleanNode { + ModifierSet ms; + + public CQLProxNode(CQLNode left) { + ms = new ModifierSet("prox"); + this.left = left; + // this.right left unresolved for now ... + } + + // ... delayed "second half" of the constructor + public void addSecondSubterm(CQLNode right) { + this.right = right; + } + + String op() { + return ms.toCQL(); + } + + public void addModifier(String type, String value) { + ms.addModifier(type, value); + } + + String booleanXQL(int level) { + return ms.toXCQL(level, "boolean"); + } +} diff --git a/src/org/z3950/zing/cql/CQLRelation.java b/src/org/z3950/zing/cql/CQLRelation.java index 3de9f57..ff8db8f 100644 --- a/src/org/z3950/zing/cql/CQLRelation.java +++ b/src/org/z3950/zing/cql/CQLRelation.java @@ -1,4 +1,4 @@ -// $Id: CQLRelation.java,v 1.1 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLRelation.java,v 1.2 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; import java.util.Vector; @@ -6,71 +6,26 @@ import java.lang.StringBuffer; /** * Represents a relation between a CQL qualifier and term. - * ### + * ## * - * @version $Id: CQLRelation.java,v 1.1 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLRelation.java,v 1.2 2002-10-31 22:22:01 mike Exp $ */ public class CQLRelation extends CQLNode { - String base; - Vector modifiers; + ModifierSet ms; public CQLRelation(String base) { - this.base = base; - modifiers = new Vector(); + ms = new ModifierSet(base); } public void addModifier(String modifier) { - modifiers.add(modifier); - } - - public String[] getModifiers() { - int n = modifiers.size(); - String[] res = new String[n]; - for (int i = 0; i < n; i++) { - res[i] = (String) modifiers.get(i); - } - - return res; + ms.addModifier(null, modifier); } public String toXCQL(int level) { - StringBuffer buf = new StringBuffer(); - buf.append (indent(level) + "\n" + - indent(level+1) + "" + xq(base) + "\n"); - String[] mods = getModifiers(); - if (mods.length > 0) { - buf.append(indent(level+1) + "\n"); - for (int i = 0; i < mods.length; i++) - buf.append(indent(level+2)). - append(""). append(mods[i]). - append("\n"); - buf.append(indent(level+1) + "\n"); - } - buf.append(indent(level) + "\n"); - return buf.toString(); + return ms.toXCQL(level, "relation"); } public String toCQL() { - StringBuffer buf = new StringBuffer(base); - String[] mods = getModifiers(); - for (int i = 0; i < mods.length; i++) { - buf.append("/").append(mods[i]); - } - - return buf.toString(); - } - - public static void main(String[] args) { - if (args.length < 1) { - System.err.println("Usage: CQLRelation ..."); - System.exit(1); - } - - CQLRelation res = new CQLRelation(args[0]); - for (int i = 1; i < args.length; i++) { - res.addModifier(args[i]); - } - - System.out.println(res.toCQL()); + return ms.toCQL(); } } diff --git a/src/org/z3950/zing/cql/CQLTermNode.java b/src/org/z3950/zing/cql/CQLTermNode.java index 5c149a9..fda7147 100644 --- a/src/org/z3950/zing/cql/CQLTermNode.java +++ b/src/org/z3950/zing/cql/CQLTermNode.java @@ -1,13 +1,13 @@ -// $Id: CQLTermNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $ +// $Id: CQLTermNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $ package org.z3950.zing.cql; /** * Represents a terminal node in a CQL parse-tree. - * ### + * ## * - * @version $Id: CQLTermNode.java,v 1.5 2002-10-30 09:19:26 mike Exp $ + * @version $Id: CQLTermNode.java,v 1.6 2002-10-31 22:22:01 mike Exp $ */ public class CQLTermNode extends CQLNode { private String qualifier; @@ -31,9 +31,14 @@ public class CQLTermNode extends CQLNode { String toCQL() { String quotedQualifier = maybeQuote(qualifier); String quotedTerm = maybeQuote(term); + String res = quotedTerm; - // ### We don't always need spaces around `relation'. - return quotedQualifier + " " + relation.toCQL() + " " + quotedTerm; + if (!qualifier.equalsIgnoreCase("srw.serverChoice")) { + // ### We don't always need spaces around `relation'. + res = quotedQualifier + " " + relation.toCQL() + " " + quotedTerm; + } + + return res; } static String maybeQuote(String str) { @@ -48,7 +53,7 @@ public class CQLTermNode extends CQLNode { str.indexOf('/') != -1 || str.indexOf('(') != -1 || str.indexOf(')') != -1) { - str = '"' + replace(str, "\"", "\\\"") + '"'; + str = '"' + Utils.replaceString(str, "\"", "\\\"") + '"'; } return str; diff --git a/src/org/z3950/zing/cql/Makefile b/src/org/z3950/zing/cql/Makefile index 703de72..e91d911 100644 --- a/src/org/z3950/zing/cql/Makefile +++ b/src/org/z3950/zing/cql/Makefile @@ -1,8 +1,9 @@ -# $Id: Makefile,v 1.2 2002-10-30 09:19:26 mike Exp $ +# $Id: Makefile,v 1.3 2002-10-31 22:22:01 mike Exp $ -all: CQLNode.class CQLTermNode.class CQLBooleanNode.class \ +all: Utils.class \ + CQLNode.class CQLTermNode.class CQLBooleanNode.class \ CQLAndNode.class CQLOrNode.class CQLNotNode.class \ - CQLRelation.class \ + CQLRelation.class CQLProxNode.class ModifierSet.class \ CQLParser.class CQLLexer.class CQLParseException.class \ CQLGenerator.class ParameterMissingException.class diff --git a/src/org/z3950/zing/cql/ModifierSet.java b/src/org/z3950/zing/cql/ModifierSet.java new file mode 100644 index 0000000..18d8c9d --- /dev/null +++ b/src/org/z3950/zing/cql/ModifierSet.java @@ -0,0 +1,95 @@ +// $Id: ModifierSet.java,v 1.1 2002-10-31 22:22:01 mike Exp $ + +package org.z3950.zing.cql; +import java.util.Vector; +import java.lang.StringBuffer; + +/** + * Represents a base String and a set of modifier Strings. + *

+ * This class is used as a workhorse delegate by both CQLRelation and + * CQLProxNode - two functionally very separate classes that happen to + * require the same data structures and functionality. + * + * @version $Id: ModifierSet.java,v 1.1 2002-10-31 22:22:01 mike Exp $ + */ +public class ModifierSet { + String base; + Vector modifiers; + + public ModifierSet(String base) { + this.base = base; + modifiers = new Vector(); + } + + public void addModifier(String type, String value) { + Vector modifier = new Vector(); + modifier.add(type); + modifier.add(value); + modifiers.add(modifier); + } + + public Vector[] getModifiers() { + int n = modifiers.size(); + Vector[] res = new Vector[n]; + for (int i = 0; i < n; i++) { + res[i] = (Vector) modifiers.get(i); + } + + return res; + } + + public String toXCQL(int level, String topLevelElement) { + StringBuffer buf = new StringBuffer(); + buf.append (Utils.indent(level) + "<" + topLevelElement + ">\n" + + Utils.indent(level+1) + "" + Utils.xq(base) + + "\n"); + Vector[] mods = getModifiers(); + if (mods.length > 0) { + buf.append(Utils.indent(level+1) + "\n"); + for (int i = 0; i < mods.length; i++) { + Vector modifier = mods[i]; + buf.append(Utils.indent(level+2)). + append("\n"); + if (modifier.get(0) != null) + buf.append(Utils.indent(level+3)). + append(""). + append(Utils.xq((String) modifier.get(0))). + append("\n"); + buf.append(Utils.indent(level+3)); + buf.append(""). + append(Utils.xq((String) modifier.get(1))). + append("\n"); + buf.append(Utils.indent(level+2)). + append("\n"); + } + buf.append(Utils.indent(level+1) + "\n"); + } + buf.append(Utils.indent(level) + "\n"); + return buf.toString(); + } + + public String toCQL() { + StringBuffer buf = new StringBuffer(base); + Vector[] mods = getModifiers(); + for (int i = 0; i < mods.length; i++) { + buf.append("/").append(mods[i].get(1)); + } + + return buf.toString(); + } + + public static void main(String[] args) { + if (args.length < 1) { + System.err.println("Usage: ModifierSet [ ]..."); + System.exit(1); + } + + ModifierSet res = new ModifierSet(args[0]); + for (int i = 1; i < args.length; i += 2) { + res.addModifier(args[i], args[i+1]); + } + + System.out.println(res.toCQL()); + } +} -- 1.7.10.4