X-Git-Url: http://git.indexdata.com/?p=simpleserver-moved-to-github.git;a=blobdiff_plain;f=SimpleServer.pm;h=bc8a9abdcec08a8469403c36d666dfbfc978287b;hp=2fbaf7fd7d5874f181a5cfeffa3d2734a91c7fbb;hb=9e52fe3b352579982d63ec09711cff6c4b49de30;hpb=18dfeef374769f1739e820f8b66b4d145c70112c diff --git a/SimpleServer.pm b/SimpleServer.pm index 2fbaf7f..bc8a9ab 100644 --- a/SimpleServer.pm +++ b/SimpleServer.pm @@ -1,5 +1,5 @@ ## -## Copyright (c) 2000-2004, Index Data. +## Copyright (c) 2000-2006, Index Data. ## ## Permission to use, copy, modify, distribute, and sell this software and ## its documentation, in whole or in part, for any purpose, is hereby granted, @@ -25,7 +25,7 @@ ## ## -## $Id: SimpleServer.pm,v 1.23 2006-03-24 01:21:15 mike Exp $ +## $Id: SimpleServer.pm,v 1.46 2007-09-25 09:02:14 mike Exp $ package Net::Z3950::SimpleServer; @@ -39,7 +39,7 @@ require AutoLoader; @ISA = qw(Exporter AutoLoader DynaLoader); @EXPORT = qw( ); -$VERSION = '1.00'; +$VERSION = '1.10'; bootstrap Net::Z3950::SimpleServer $VERSION; @@ -69,6 +69,13 @@ sub launch_server { my $self = shift; my @args = @_; + ### This modal internal interface, in which we set a bunch of + # globals and then call start_server(), is asking for + # trouble. Instead, we should just pass the $self object + # as a parameter into start_server(). + if (defined($self->{GHANDLE})) { + set_ghandle($self->{GHANDLE}); + } if (defined($self->{INIT})) { set_init_handler($self->{INIT}); } @@ -83,22 +90,75 @@ sub launch_server { if (defined($self->{SCAN})) { set_scan_handler($self->{SCAN}); } + if (defined($self->{SORT})) { + set_sort_handler($self->{SORT}); + } + if (defined($self->{EXPLAIN})) { + set_explain_handler($self->{EXPLAIN}); + } + if (defined($self->{DELETE})) { + set_delete_handler($self->{DELETE}); + } start_server(@args); } # Register packages that we will use in translated RPNs +package Net::Z3950::RPN::Node; package Net::Z3950::APDU::Query; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::APDU::OID; package Net::Z3950::RPN::And; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::RPN::Or; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::RPN::AndNot; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::RPN::Term; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::RPN::RSID; +our @ISA = qw(Net::Z3950::RPN::Node); package Net::Z3950::RPN::Attributes; package Net::Z3950::RPN::Attribute; + +# Utility method for re-rendering Type-1 query back down to PQF +package Net::Z3950::RPN::Node; + +sub toPQF { + my $this = shift(); + my $class = ref $this; + + if ($class eq "Net::Z3950::APDU::Query") { + my $res = ""; + my $set = $this->{attributeSet}; + $res .= "\@attrset $set " if defined $set; + return $res . $this->{query}->toPQF(); + } elsif ($class eq "Net::Z3950::RPN::Or") { + return '@or ' . $this->[0]->toPQF() . ' ' . $this->[1]->toPQF(); + } elsif ($class eq "Net::Z3950::RPN::And") { + return '@and ' . $this->[0]->toPQF() . ' ' . $this->[1]->toPQF(); + } elsif ($class eq "Net::Z3950::RPN::AndNot") { + return '@not ' . $this->[0]->toPQF() . ' ' . $this->[1]->toPQF(); + } elsif ($class eq "Net::Z3950::RPN::RSID") { + return '@set ' . $this->{id}; + } elsif ($class ne "Net::Z3950::RPN::Term") { + die "unknown PQF node-type '$class'"; + } + + my $res = ""; + foreach my $attr (@{ $this->{attributes} }) { + $res .= "\@attr "; + my $set = $attr->{attributeSet}; + $res .= "$set " if defined $set; + $res .= $attr->{attributeType} . "=" . $attr->{attributeValue} . " "; + } + + return $res . $this->{term}; +} + + # Must revert to original package for Autoloader's benefit package Net::Z3950::SimpleServer; @@ -145,15 +205,14 @@ Net::Z3950::SimpleServer - Simple Perl API for building Z39.50 servers. } } - ## Register custom event handlers: + my $z = new Net::Z3950::SimpleServer(GHANDLE = $someObject, + INIT => \&my_init_handler, + CLOSE => \&my_close_handler, + SEARCH => \&my_search_handler, + FETCH => \&my_fetch_handler); - my $z = new Net::Z3950::SimpleServer( INIT => \&my_init_handler, - CLOSE => \&my_close_handler, - SEARCH => \&my_search_handler, - FETCH => \&my_fetch_handler); ## Launch server: - $z->launch_server("ztest.pl", @ARGV); =head1 DESCRIPTION @@ -209,7 +268,15 @@ means of the SimpleServer object constructor SEARCH => \&my_search_handler, PRESENT => \&my_present_handler, SCAN => \&my_scan_handler, - FETCH => \&my_fetch_handler); + FETCH => \&my_fetch_handler, + EXPLAIN => \&my_explain_handler, + DELETE => \&my_delete_handler, + SORT => \&my_sort_handler); + +In addition, the arguments to the constructor may include GHANDLE, a +global handle which is made available to each invocation of every +callback function. This is typically a reference to either a hash or +an object. If you want your SimpleServer to start a thread (threaded mode) to handle each incoming Z39.50 request instead of forking a process @@ -253,6 +320,7 @@ The argument hash passed to the init handler has the form $args = { ## Response parameters: + PEER_NAME => "", ## Name or IP address of connecting client IMP_ID => "", ## Z39.50 Implementation ID IMP_NAME => "", ## Z39.50 Implementation name IMP_VER => "", ## Z39.50 Implementation version @@ -262,6 +330,7 @@ The argument hash passed to the init handler has the form ## this member contains user name PASS => "yyy" ## Under same conditions, this member ## contains the password in clear text + GHANDLE => $obj ## Global handler specified at creation HANDLE => undef ## Handler of Perl data structure }; @@ -291,6 +360,7 @@ mous hash. The structure is the following: $args = { ## Request parameters: + GHANDLE => $obj ## Global handler specified at creation HANDLE => ref, ## Your session reference. SETNAME => "id", ## ID of the result set REPL_SET => 0, ## Replace set if already existing? @@ -398,9 +468,6 @@ of the result-set is in the C element. =back -(I guess I should make a superclass C and make -all of these subclasses of it. Not done that yet, but will do one day.) - =back =over 4 @@ -449,7 +516,7 @@ a ``relation'' attribute, etc. =item C -An integer indicating the value of the attribute - for example, under +An integer or string indicating the value of the attribute - for example, under BIB-1, if the attribute type is 1, then value 4 indictates a title search and 7 indictates an ISBN search; but if the attribute type is 2, then value 4 indicates a ``greater than or equal'' search, and 102 @@ -459,7 +526,12 @@ indicates a relevance match. =back -Note that, at the moment, none of these classes have any methods at +All of these classes except C and C are +subclasses of the abstract class C. That class +has a single method, C, which may be used to turn an RPN +tree, or part of one, back into a textual prefix query. + +Note that, apart to C, none of these classes have any methods at all: the blessing into classes is largely just a documentation thing so that, for example, if you do @@ -500,6 +572,7 @@ The informations exchanged between client and present handle are: $args = { ## Client/server request: + GHANDLE => $obj ## Global handler specified at creation HANDLE => ref, ## Reference to datastructure SETNAME => "id", ## Result set ID START => xxx, ## Start position @@ -526,11 +599,13 @@ The parameters exchanged between the server and the fetch handler are $args = { ## Client/server request: + GHANDLE => $obj ## Global handler specified at creation HANDLE => ref ## Reference to data structure SETNAME => "id" ## ID of the requested result set OFFSET => nnn ## Record offset number REQ_FORM => "n.m.k.l"## Client requested format OID COMP => "xyz" ## Formatting instructions + SCHEMA => "abc" ## Requested schema, if any ## Handler response: @@ -541,6 +616,7 @@ The parameters exchanged between the server and the fetch handler are ERR_STR => "" ## Error string SUR_FLAG => 0 ## Surrogate diagnostic flag REP_FORM => "n.m.k.l"## Provided format OID + SCHEMA => "abc" ## Provided schema, if any }; The REP_FORM value has by default the REQ_FORM value but can be set to @@ -572,8 +648,13 @@ an index of a book, you always find something! The parameters exchanged are $args = { ## Client request - HANDLE => $ref ## Reference to data structure + GHANDLE => $obj, ## Global handler specified at creation + HANDLE => $ref, ## Reference to data structure + DATABASES => ["xxx"], ## Reference to a list of data- + ## bases to search TERM => 'start', ## The start term + RPN => $obj, ## Reference to a Net::Z3950::RPN::Term + NUMBER => xx, ## Number of requested terms POS => yy, ## Position of starting point ## within returned list @@ -606,20 +687,31 @@ should point at a data structure of this kind, ... ]; -The $status flag should be assigned one of two values: +The $status flag is only meaningful after a successful scan, and +should be assigned one of two values: - Net::Z3950::SimpleServer::ScanSuccess On success (default) - Net::Z3950::SimpleServer::ScanPartial Less terms returned than requested + Net::Z3950::SimpleServer::ScanSuccess Full success (default) + Net::Z3950::SimpleServer::ScanPartial Fewer terms returned than requested The STEP member contains the requested number of entries in the term-list between two adjacent entries in the response. +A better alternative to the TERM member is the the RPN +member, which is a reference to a Net::Z3950::RPN::Term object +representing the scan cloause. The structure of that object is the +same as for Term objects included as part of the RPN tree passed to +search handlers. This is more useful than the simple TERM because it +includes attributes (e.g. access points associated with the term), +which are discarded by the TERM element. + =head2 Close handler -The argument hash recieved by the close handler has one element only: +The argument hash recieved by the close handler has two elements only: $args = { ## Server provides: + + GHANDLE => $obj ## Global handler specified at creation HANDLE => ref ## Reference to data structure }; @@ -627,11 +719,172 @@ What ever data structure the HANDLE value points at goes out of scope after this call. If you need to close down a connection to your server or something similar, this is the place to do it. +=head2 Delete handler + +The argument hash recieved by the delete handler has the following elements: + + $args = { + ## Client request: + GHANDLE => $obj, ## Global handler specified at creation + HANDLE => ref, ## Reference to data structure + SETNAME => "id", ## Result set ID + + ## Server response: + STATUS => 0 ## Deletion status + }; + +The SETNAME element of the argument hash may or may not be defined. +If it is, then SETNAME is the name of a result set to be deleted; if +not, then all result-sets associated with the current session should +be deleted. In either case, the callback function should report on +success or failure by setting the STATUS element either to zero, on +success, or to an integer from 1 to 10, to indicate one of the ten +possible failure codes described in section 3.2.4.1.4 of the Z39.50 +standard -- see +http://www.loc.gov/z3950/agency/markup/05.html#Delete-list-statuses1 + +=head2 Sort handler + +The argument hash recieved by the sort handler has the following elements: + + $args = { + ## Client request: + GHANDLE => $obj, ## Global handler specified at creation + HANDLE => ref, ## Reference to data structure + INPUT => [ a, b ... ], ## Names of result-sets to sort + OUTPUT => "name", ## Name of result-set to sort into + SEQUENCE ## Sort specification: see below + + ## Server response: + STATUS => 0, ## Success, Partial or Failure + ERR_CODE => 0, ## Error code + ERR_STR => '', ## Diagnostic message + + }; + +The SEQUENCE element is a reference to an array, each element of which +is a hash representing a sort key. Each hash contains the following +elements: + +=over 4 + +=item RELATION + +0 for an ascending sort, 1 for descending, 3 for ascending by +frequency, or 4 for descending by frequency. + +=item CASE + +0 for a case-sensitive sort, 1 for case-insensitive + +=item MISSING + +How to respond if one or more records in the set to be sorted are +missing the fields indicated in the sort specification. 1 to abort +the sort, 2 to use a "null value", 3 if a value is provided to use in +place of the missing data (although in the latter case, the actual +value to use is currently not made available, so this is useless). + +=back + +And one or other of the following: + +=over 4 + +=item SORTFIELD + +A string indicating the field to be sorted, which the server may +interpret as it sees fit (presumably by an out-of-band agreement with +the client). + +=item ELEMENTSPEC_TYPE and ELEMENTSPEC_VALUE + +I have no idea what this is. + +=item ATTRSET and SORT_ATTR + +ATTRSET is the attribute set from which the attributes are taken, and +SORT_ATTR is a reference to an array containing the attributes +themselves. Each attribute is represented by (are you following this +carefully?) yet another hash, this one containing the elements +ATTR_TYPE and ATTR_VALUE: for example, type=1 and value=4 in the BIB-1 +attribute set would indicate access-point 4 which is title, so that a +sort of title is requested. + +=back + +Precisely why all of the above is so is not clear, but goes some way +to explain why, in the Z39.50 world, the developers of the standard +are not so much worshipped as blamed. + +The backend function should set STATUS to 0 on success, 1 for "partial +success" (don't ask) or 2 on failure, in which case ERR_CODE and +ERR_STR should be set. + +=head2 Support for SRU and SRW + +Since release 1.0, SimpleServer includes support for serving the SRU +and SRW protocols as well as Z39.50. These ``web-friendly'' protocols +enable similar functionality to that of Z39.50, but by means of rich +URLs in the case of SRU, and a SOAP-based web-service in the case of +SRW. These protocols are described at +http://www.loc.gov/sru + +In order to serve these protocols from a SimpleServer-based +application, it is necessary to launch the application with a YAZ +Generic Frontend Server (GFS) configuration file, which can be +specified using the command-line argument C<-f> I. A +minimal configuration file looks like this: + + + + pqf.properties + + + +This file specifies only that C should be used to +translate the CQL queries of SRU and SRW into corresponding Z39.50 +Type-1 queries. For more information about YAZ GFS configuration, +including how to specify an Explain record, see the I +section of the YAZ manual at +http://indexdata.com/yaz/doc/server.vhosts.tkl + +The mapping of CQL queries into Z39.50 Type-1 queries is specified by +a file that indicates which BIB-1 attributes should be generated for +each CQL index, relation, modifiers, etc. A typical section of this +file looks like this: + + index.dc.title = 1=4 + index.dc.subject = 1=21 + index.dc.creator = 1=1003 + relation.< = 2=1 + relation.le = 2=2 + +This file specifies the BIB-1 access points (type=1) for the Dublin +Core indexes C, C<subject> and C<creator>, and the BIB-1 +relations (type=2) corresponding to the CQL relations C<E<lt>> and +C<E<lt>=>. For more information about the format of this file, see +the I<CQL> section of the YAZ manual at +http://indexdata.com/yaz/doc/tools.tkl#tools.cql + +The YAZ distribution include a sample CQL-to-PQF mapping configuration +file called C<pqf.properties>; this is sufficient for many +applications, and a good base to work from for most others. + +If a SimpleServer-based application is run without this SRU-specific +configuration, it can still serve SRU; however, CQL queries will not +be translated, but passed straight through to the search-handler +function, as the C<CQL> member of the parameters hash. It is then the +responsibility of the back-end application to parse and handle the CQL +query, which is most easily done using Ed Summers' fine C<CQL::Parser> +module, available from CPAN at +http://search.cpan.org/~esummers/CQL-Parser/ + =head1 AUTHORS -Anders Sønderberg (sondberg@indexdata.dk) and Sebastian Hammer -(quinn@indexdata.dk). Substantial contributions made by Mike Taylor -(mike@miketaylor.org.uk). +Anders Sønderberg (sondberg@indexdata.dk), +Sebastian Hammer (quinn@indexdata.dk), +Mike Taylor (indexdata.com). =head1 SEE ALSO