X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=lib%2FNet%2FZ3950%2FPQF.pm;h=3c78a7f69775ad2281431b3b377de1f5e6b7f9f6;hb=7cced0e025e85ed6b8cf92866a2580fd4b46d209;hp=92aae90a9b0bc623cab82aa56ae01b27c90ead36;hpb=5ae638fbecafdb1b5443301516c446f0a12fc04d;p=perl-pqf.git diff --git a/lib/Net/Z3950/PQF.pm b/lib/Net/Z3950/PQF.pm index 92aae90..3c78a7f 100644 --- a/lib/Net/Z3950/PQF.pm +++ b/lib/Net/Z3950/PQF.pm @@ -1,4 +1,4 @@ -# $Id: PQF.pm,v 1.2 2004-12-17 15:28:30 mike Exp $ +# $Id: PQF.pm,v 1.7 2004-12-23 10:24:12 mike Exp $ package Net::Z3950::PQF; @@ -8,7 +8,7 @@ use warnings; use Net::Z3950::PQF::Node; -our $VERSION = '0.02'; +our $VERSION = '0.03'; =head1 NAME @@ -28,9 +28,11 @@ This library provides a parser for PQF (Prefix Query Format), an ugly but precise string format for expressing Z39.50 Type-1 queries. This format is widely used behind the scenes of Z39.50 applications, and is also used extensively with test-harness programs such as the YAZ -command-line client, C. +command-line client, C. A few particularly misguided +souls have been known to type it by hand. -It is simple to use. Create a parser object, then pass PQF strings +Unlike PQF itself, this module +is simple to use. Create a parser object, then pass PQF strings into its C method to yield parse-trees. The trees are made up of nodes whose types are subclasses of C. @@ -59,6 +61,7 @@ sub new { my $class = shift(); return bless { + text => undef, errmsg => undef, }, $class; } @@ -68,9 +71,9 @@ sub new { $query = '@and @attr 1=1003 kernighan @attr 1=4 unix'; $node = $parser->parse($query); - if (!defined $node) + if (!defined $node) { die "parse($query) failed: " . $parser->errmsg(); - } + } Parses the PQF string provided as its argument. If an error occurs, then an undefined value is returned, and the error message can be @@ -80,20 +83,131 @@ of the parse tree is returned. $node2 = $parser->parse($query, "zthes"); $node3 = $parser->parse($query, "1.2.840.10003.3.13"); -A second argument may be provided, after the query itself. If it is +A second argument may be provided after the query itself. If it is provided, then it is taken to be either the name or the OID of a default attribute set, which attributes specified in the query belong -to if no alternative attribute set is explicitly specified. When this -second argument is absent, the default attribute set is BIB-1. +to if no alternative attribute set is explicitly specified within the +query. When this second argument is absent, the default attribute set +is BIB-1. =cut sub parse { my $this = shift(); - my($attrset) = @_; + my($text, $attrset) = @_; $attrset = "bib-1" if !defined $attrset; - die "parse($this) not yet implemented"; + $this->{text} = $text; + return $this->_parse($attrset, {}); +} + + +# PRIVATE to parse(); +# +# Underlying parse function. $attrset is the default attribute-set to +# use for attributes that are not specified with an explicit set, and +# $attrhash is hash of attributes (at most one per type per +# attribute-set) to be applied to all nodes below this point. The +# keys of this hash are of the form ":" and the values +# are the corresponding attribute values. +# +sub _parse { + my $this = shift(); + my($attrset, $attrhash) = @_; + + $this->{text} =~ s/^\s+//; + + ### This rather nasty hack for quoted terms doesn't recognised + # backslash-quoted embedded double quotes. + if ($this->{text} =~ s/^"(.*?)"//) { + return $this->_leaf('term', $1, $attrhash); + } + + # Also recognise multi-word terms enclosed in {curly braces} + if ($this->{text} =~ s/^{(.*?)}//) { + return $this->_leaf('term', $1, $attrhash); + } + + my $word = $this->_word(); + if ($word eq '@attrset') { + $attrset = $this->_word(); + return $this->_parse($attrset, $attrhash); + + } elsif ($word eq '@attr') { + $word = $this->_word(); + if ($word !~ /=/) { + $attrset = $word; + $word = $this->_word(); + } + my($type, $val) = ($word =~ /(.*)=(.*)/); + my %h = %$attrhash; + $h{"$attrset:$type"} = $val; + return $this->_parse($attrset, \%h); + + } elsif ($word eq '@and' || $word eq '@or' || $word eq '@not') { + my $sub1 = $this->_parse($attrset, $attrhash); + my $sub2 = $this->_parse($attrset, $attrhash); + if ($word eq '@and') { + return new Net::Z3950::PQF::AndNode($sub1, $sub2); + } elsif ($word eq '@or') { + return new Net::Z3950::PQF::OrNode($sub1, $sub2); + } elsif ($word eq '@not') { + return new Net::Z3950::PQF::NotNode($sub1, $sub2); + } else { + die "Houston, we have a problem"; + } + + } elsif ($word eq '@prox') { + return $this->_error("proximity not yet implemented"); + + } elsif ($word eq '@set') { + $word = $this->_word(); + return $this->_leaf('rset', $word, $attrhash); + } + + # It must be a bareword + return $this->_leaf('term', $word, $attrhash); +} + + +# PRIVATE to _parse(); +sub _word { + my $this = shift(); + + $this->{text} =~ s/^\s+//; + $this->{text} =~ s/^(\S+)//; + return $1; +} + + +# PRIVATE to _parse(); +sub _error { + my $this = shift(); + my (@msg) = @_; + + $this->{errmsg} = join("", @msg); + return undef; +} + + +# PRIVATE to _parse(); +sub _leaf { + my $this = shift(); + my($type, $word, $attrhash) = @_; + + my @attrs; + foreach my $key (sort keys %$attrhash) { + my($attrset, $type) = split /:/, $key; + push @attrs, [ $attrset, $type, $attrhash->{$key} ]; + } + + if ($type eq 'term') { + return new Net::Z3950::PQF::TermNode($word, @attrs); + } elsif ($type eq 'rset') { + return new Net::Z3950::PQF::RsetNode($word, @attrs); + } else { + die "_leaf() called with type='$type' (should be 'term' or 'rset')"; + } } @@ -101,6 +215,9 @@ sub parse { print $parser->errmsg(); +Returns the last error-message generated by a failed attempt to parse +a query. + =cut sub errmsg { @@ -111,6 +228,8 @@ sub errmsg { =head1 SEE ALSO +The C module. + The definition of the Type-1 query in the Z39.50 standard, the relevant section of which is on-line at http://www.loc.gov/z3950/agency/markup/09.html#3.7 @@ -128,7 +247,7 @@ Mike Taylor, Emike@indexdata.comE Copyright 2004 by Index Data ApS. This library is free software; you can redistribute it and/or modify -it under the same terms as Perl itself. +it under the same terms as Perl itself. =cut