-# $Id: IRSpy.pm,v 1.30 2006-10-13 13:41:56 sondberg Exp $
+# $Id: IRSpy.pm,v 1.80 2007-03-30 09:50:55 mike Exp $
package ZOOM::IRSpy;
use strict;
use warnings;
-use Data::Dumper; # For debugging only
+use Data::Dumper; # For debugging only
+use File::Basename;
+use XML::LibXSLT;
+use XML::LibXML;
+use XML::LibXML::XPathContext;
use ZOOM;
use Net::Z3950::ZOOM 1.13; # For the ZOOM version-check only
use ZOOM::IRSpy::Node;
use ZOOM::IRSpy::Connection;
-use ZOOM::IRSpy::Record;
+use ZOOM::IRSpy::Stats;
+use ZOOM::IRSpy::Utils qw(cql_target render_record irspy_xpath_context);
our @ISA = qw();
our $VERSION = '0.02';
-our $irspy_ns = 'http://indexdata.com/irspy/1.0';
+our $irspy_to_zeerex_xsl = dirname(__FILE__) . '/../../xsl/irspy2zeerex.xsl';
# Enumeration for callback functions to return
sub TASK_DONE { 18 } # Task is complete, next task should begin
sub TEST_GOOD { 8 } # Whole test is complete, and succeeded
sub TEST_BAD { 31 } # Whole test is complete, and failed
+sub TEST_SKIPPED { 12 } # Test couldn't be run
package ZOOM::IRSpy;
use ZOOM::IRSpy;
$spy = new ZOOM::IRSpy("target/string/for/irspy/database");
- print $spy->report_status();
+ $spy->targets(@targets);
+ $spy->initialise();
+ $res = $spy->check("Main");
=head1 DESCRIPTION
sub new {
my $class = shift();
- my($dbname, $user, $password) = @_;
+ my($dbname, $user, $password, $activeSetSize) = @_;
my @options;
push @options, (user => $user, password => $password)
my $conn = new ZOOM::Connection($dbname, 0, @options)
or die "$0: can't connection to IRSpy database 'dbname'";
+ my $xslt = new XML::LibXSLT;
+
+ $xslt->register_function($ZOOM::IRSpy::Utils::IRSPY_NS, 'strcmp',
+ \&ZOOM::IRSpy::Utils::xslt_strcmp);
+
+ my $libxml = new XML::LibXML;
+ my $xsl_doc = $libxml->parse_file($irspy_to_zeerex_xsl);
+ my $irspy_to_zeerex_style = $xslt->parse_stylesheet($xsl_doc);
+
my $this = bless {
conn => $conn,
- allrecords => 1, # unless overridden by targets()
- query => undef, # filled in later
- targets => undef, # filled in later
- connections => undef, # filled in later
- tests => [], # stack of tests currently being executed
+ query => "cql.allRecords=1", # unless overridden
+ targets => undef, # Filled in later if targets() is
+ # called; used only to keep state from
+ # targets() until initialise() is
+ # called.
+ connections => undef, # Filled in by initialise()
+ queue => undef, # Filled in by initialise()
+ libxml => $libxml,
+ irspy_to_zeerex_style => $irspy_to_zeerex_style,
+ tests => undef, # Tree of tests to be executed
+ activeSetSize => defined $activeSetSize ? $activeSetSize : 10,
}, $class;
$this->log("irspy", "starting up with database '$dbname'");
}
+sub find_targets {
+ my $this = shift();
+ my($query) = @_;
+
+ $this->{query} = $query;
+}
+
+
# Explicitly nominate a set of targets to check, overriding the
# default which is to re-check everything in the database. Each
# target already in the database results in the existing record being
$this->log("irspy", "setting explicit list of targets ",
join(", ", map { "'$_'" } @targets));
- $this->{allrecords} = 0;
my @qlist;
foreach my $target (@targets) {
my($host, $port, $db, $newtarget) = _parse_target_string($target);
$this->log("irspy_debug", "rewriting '$target' to '$newtarget'");
$target = $newtarget; # This is written through the ref
}
- push @qlist, (qq[(host="$host" and port="$port" and path="$db")]);
+ push @qlist, cql_target($host, $port, $db);
}
$this->{targets} = \@targets;
}
-# There are two cases.
-#
-# 1. A specific set of targets is nominated on the command line.
-# - Records must be fetched for those targets that are in the DB
-# - New, empty records must be made for those that are not.
-# - Updated records written to the DB may or may not be new.
-#
-# 2. All records in the database are to be checked.
-# - Records must be fetched for all targets in the DB
-# - Updated records written to the DB may not be new.
-#
-# That's all -- what could be simpler?
+# Records must be fetched for all records satisfying $this->{query} If
+# $this->{targets} is already set (i.e. a specific list of targets to
+# check was specified by a call to targets()), then new, empty records
+# will be made for any targets that are not already in the database.
#
sub initialise {
my $this = shift();
- my %target2record;
- if ($this->{allrecords}) {
- # We need to check on every target in the database, which
- # means we need to do a "find all". According to the BIB-1
- # semantics document at
- # http://www.loc.gov/z3950/agency/bib1.html
- # the query
- # @attr 2=103 @attr 1=1035 x
- # should find all records, but it seems that Zebra doesn't
- # support this. Furthermore, when using the "alvis" filter
- # (as we do for IRSpy) it doesn't support the use of any BIB-1
- # access point -- not even 1035 "everywhere" -- so instead we
- # hack together a search that we know will find all records.
- $this->{query} = "port=?*";
+ my @targets;
+ my $targets = $this->{targets};
+ if (defined $targets) {
+ @targets = @$targets;
+ delete $this->{targets};
} else {
- # Prepopulate the target map with nulls so that after we fill
- # in what we can from the database query, we know which target
- # IDs we need new records for.
- foreach my $target (@{ $this->{targets} }) {
- $target2record{lc($target)} = undef;
- }
- }
+ my $rs = $this->{conn}->search(new ZOOM::Query::CQL($this->{query}));
+ $this->log("irspy", "'", $this->{query}, "' found ",
+ $rs->size(), " target records");
+ delete $this->{query};
- $this->log("irspy_debug", "query '", $this->{query}, "'");
- my $rs = $this->{conn}->search(new ZOOM::Query::CQL($this->{query}));
- delete $this->{query}; # No longer needed at all
- $this->log("irspy_debug", "found ", $rs->size(), " target records");
- foreach my $i (1 .. $rs->size()) {
- my $target = _render_record($rs, $i-1, "id");
- my $zeerex = _render_record($rs, $i-1, "zeerex");
- #print STDERR "making '$target' record with '$zeerex'\n";
- $target2record{lc($target)} =
- new ZOOM::IRSpy::Record($this, $target, $zeerex);
- push @{ $this->{targets} }, $target
- if $this->{allrecords};
- }
-
- # Make records for targets not previously in the database
- foreach my $target (keys %target2record) {
- my $record = $target2record{$target};
- if (!defined $record) {
- $this->log("irspy_debug", "made new record for '$target'");
- $target2record{$target} = new ZOOM::IRSpy::Record($this, $target);
- } else {
- $this->log("irspy_debug", "using existing record for '$target'");
+ foreach my $i (1 .. $rs->size()) {
+ push @targets, render_record($rs, $i-1, "id");
}
}
+ my $n = $this->{activeSetSize};
+ $n = @targets if $n == 0 || $n > @targets;
+
my @connections;
- foreach my $target (@{ $this->{targets} }) {
- my $conn = create ZOOM::IRSpy::Connection($this, async => 1);
- $conn->option(host => $target);
- my $record = delete $target2record{lc($target)};
- $conn->record($record);
- push @connections, $conn;
+ foreach my $i (1..$n) {
+ push @connections, create ZOOM::IRSpy::Connection($this,
+ shift @targets,
+ async => 1);
}
- die("remaining target2record = { " .
- join(", ", map { "$_ ->'" . $target2record{$_}. "'" }
- sort keys %target2record) . " }")
- if %target2record;
$this->{connections} = \@connections;
- delete $this->{targets}; # The information is now in {connections}
+ $this->{queue} = \@targets;
}
-sub _render_record {
- my($rs, $which, $elementSetName) = @_;
-
- # There is a slight race condition here on the element-set name,
- # but it shouldn't be a problem as this is (currently) only called
- # from parts of the program that run single-threaded.
- my $old = $rs->option(elementSetName => $elementSetName);
- my $rec = $rs->record($which);
- $rs->option(elementSetName => $old);
+sub _irspy_to_zeerex {
+ my $this = shift();
+ my($conn, $save_xml) = @_;
+ my $irspy_doc = $conn->record()->{zeerex}->ownerDocument;
+
+ if ($save_xml) {
+ unlink('/tmp/irspy_orig.xml');
+ open FH, '>/tmp/irspy_orig.xml'
+ or die "can't write irspy_orig.xml: $!";
+ print FH $irspy_doc->toString();
+ close FH;
+ }
+ my %params = ();
+ my $result = $this->{irspy_to_zeerex_style}->transform($irspy_doc, %params);
+ if ($save_xml) {
+ unlink('/tmp/irspy_transformed.xml');
+ open FH, '>/tmp/irspy_transformed.xml'
+ or die "can't write irspy_transformed.xml: $!";
+ print FH $result->toString();
+ close FH;
+ }
- return $rec->render();
+ return $result->documentElement();
}
-sub _rewrite_records {
+sub _rewrite_record {
my $this = shift();
+ my($conn) = @_;
- # Write modified records back to database
- foreach my $conn (@{ $this->{connections} }) {
- my $rec = $conn->record();
- my $p = $this->{conn}->package();
- $p->option(action => "specialUpdate");
- my $xml = $rec->{zeerex}->toString();
- $p->option(record => $xml);
+ $conn->log("irspy", "rewriting XML record");
+ my $rec = $this->_irspy_to_zeerex($conn, $ENV{IRSPY_SAVE_XML});
+
+ # Since IRSpy can run for a long time between writes back to the
+ # database, it's quite possible for the server to have closed the
+ # connection as idle. So re-establish it if necessary.
+ $this->{conn}->connect($conn->option("host"));
+
+ _really_rewrite_record($this->{conn}, $rec);
+ $conn->log("irspy", "rewrote XML record");
+}
+
+
+sub _really_rewrite_record {
+ my($conn, $rec, $oldid) = @_;
+
+ my $p = $conn->package();
+ $p->option(action => "specialUpdate");
+ my $xml = $rec->toString();
+ $p->option(record => $xml);
+ $p->send("update");
+ $p->destroy();
+
+ # This is the expression in the ID-making stylesheet
+ # ../../zebra/zeerex2id.xsl
+ my $xc = irspy_xpath_context($rec);
+ my $id = $xc->find("concat(e:serverInfo/e:host, ':',
+ e:serverInfo/e:port, '/',
+ e:serverInfo/e:database)");
+ if (defined $oldid && $id ne $oldid) {
+ # Delete old record;
+ warn "IDs differ (old='$oldid' new='$id')";
+ my $p = $conn->package();
+ $p->option(action => "recordDelete");
+ $p->option(recordIdOpaque => $oldid);
+ $p->option(record => "<dummy/>"); # Work around Zebra bug
$p->send("update");
$p->destroy();
+ }
- $p = $this->{conn}->package();
- $p->send("commit");
- $p->destroy();
- if (0) {
- $xml =~ s/&/&/g;
- $xml =~ s/</</g;
- $xml =~ s/>/>/g;
- print "Updated with xml=<br/>\n<pre>$xml</pre>\n";
- }
+ $p = $conn->package();
+ $p->send("commit");
+ $p->destroy();
+ if (0) {
+ $xml =~ s/&/&/g;
+ $xml =~ s/</</g;
+ $xml =~ s/>/>/g;
+ print "Updated $conn with xml=<br/>\n<pre>$xml</pre>\n";
}
}
$tname = "Main" if !defined $tname;
$this->{tree} = $this->_gather_tests($tname)
- or die "No tests defined";
+ or die "No tests defined for '$tname'";
+ $this->{tree}->resolve();
#$this->{tree}->print(0);
my $nskipped = 0;
my @conn = @{ $this->{connections} };
+ my $nruns = 0;
+ ROUND_AND_ROUND_WE_GO:
while (1) {
my @copy_conn = @conn; # avoid alias problems after splice()
+ my $nconn = scalar(@copy_conn);
foreach my $i0 (0 .. $#copy_conn) {
my $conn = $copy_conn[$i0];
- #print "connection $i0 of ", scalar(@copy_conn), " from ", scalar(@conn), " is $conn\n";
+ #print "connection $i0 of $nconn/", scalar(@conn), " is $conn\n";
+ next if !defined $conn;
if (!$conn->current_task()) {
if (!$conn->next_task()) {
# Out of tasks: we need a new test
if (!defined $address) {
$nextaddr = "";
} else {
- $this->log("irspy_test", "checking for next test after '$address'");
+ $conn->log("irspy_test",
+ "checking for next test after '$address'");
$nextaddr = $this->_next_test($address);
}
if (!defined $nextaddr) {
$conn->log("irspy", "has no more tests: removing");
- splice @conn, $i0, 1;
+ $this->_rewrite_record($conn);
+ $conn->option(rewrote_record => 1);
+ if (@{ $this->{queue} } == 0) {
+ # Do not destroy: we need this for later sanity checks
+ splice @conn, $i0, 1;
+ } else {
+ $conn->destroy();
+ $conn[$i0] = create
+ ZOOM::IRSpy::Connection($this,
+ shift @{ $this->{queue} }, async => 1);
+ $conn[$i0]->option(current_test_address => "");
+ $conn[$i0]->log("irspy", "entering active pool - ",
+ scalar(@{ $this->{queue} }),
+ " targets remain in queue");
+ }
next;
}
or die "invalid nextaddr '$nextaddr'";
$conn->option(current_test_address => $nextaddr);
my $tname = $node->name();
- $conn->log("irspy_test", "starting test '$nextaddr' = $tname");
+ $conn->log("irspy_test",
+ "starting test '$nextaddr' = $tname");
my $tasks = $conn->tasks();
my $oldcount = @$tasks;
"ZOOM::IRSpy::Test::$tname"->start($conn);
# Prepare to start the first of the newly added tasks
$conn->next_task($tasks->[$oldcount]);
} else {
- $conn->log("irspy_task", "no tasks added by new test $tname");
+ $conn->log("irspy_task",
+ "no tasks added by new test $tname");
goto NEXT_TEST;
}
}
my $task = $conn->next_task();
die "no next task queued for $conn" if !defined $task;
- $conn->log("irspy_task", "starting task $task");
+ $conn->log("irspy_task", "preparing task $task");
$conn->next_task(0);
$conn->current_task($task);
$task->run();
}
-
- # Do we need to test $conn->is_idle()? I don't think so!
}
+ NEXT_EVENT:
my $i0 = ZOOM::event(\@conn);
- $this->log("irspy_event", "ZOOM_event(", scalar(@conn), " connections) = $i0");
- last if $i0 == 0 || $i0 == -3; # no events or no connections
+ $this->log("irspy_event",
+ "ZOOM_event(", scalar(@conn), " connections) = $i0");
+ if ($i0 < 1) {
+ my %messages = (
+ 0 => "no events remain",
+ -1 => "ZOOM::event() argument not a reference",
+ -2 => "ZOOM::event() reference not an array",
+ -3 => "no connections remain",
+ -4 => "too many connections for ZOOM::event()",
+ );
+ my $message = $messages{$i0} || "ZOOM::event() returned $i0";
+ $this->log("irspy", $message);
+ last;
+ }
+
my $conn = $conn[$i0-1];
my $ev = $conn->last_event();
my $evstr = ZOOM::event_str($ev);
$conn->log("irspy_event", "event $ev ($evstr)");
+ goto NEXT_EVENT if $ev != ZOOM::Event::ZEND;
my $task = $conn->current_task();
die "$conn has no current task for event $ev ($evstr)" if !$task;
- eval { $conn->_check() };
- if ($@ &&
- ($ev == ZOOM::Event::RECV_DATA ||
- $ev == ZOOM::Event::RECV_APDU ||
- $ev == ZOOM::Event::ZEND)) {
- # An error in, say, a search response, becomes visible to
- # ZOOM before the Receive Data event is sent and persists
- # until after the End, which means that successive events
- # each report the same error. So we just ignore errors on
- # "unimportant" events. ### But this doesn't work for,
- # say, a Connection Refused, as the only event that shows
- # us this error is the End.
- $conn->log("irspy_event", "ignoring error ",
- "on event $ev ($evstr): $@");
- next;
- }
my $res;
- if ($@) {
+ eval { $conn->check() };
+ if ($@ && ref $@ && $@->isa("ZOOM::Exception")) {
my $sub = $task->{cb}->{exception};
die $@ if !defined $sub;
$res = &$sub($conn, $task, $task->udata(), $@);
+ } elsif ($@) {
+ die "Unexpected non-ZOOM exception: " . ref($@) . " ($@)";
} else {
my $sub = $task->{cb}->{$ev};
if (!defined $sub) {
} elsif ($res == ZOOM::IRSpy::Status::TEST_GOOD ||
$res == ZOOM::IRSpy::Status::TEST_BAD) {
my $x = ($res == ZOOM::IRSpy::Status::TEST_GOOD) ? "good" : "bad";
+ $conn->log("irspy_task", "test ended during task $task ($x)");
$conn->log("irspy_test", "test completed ($x)");
$conn->current_task(0);
$conn->next_task(0);
if ($res == ZOOM::IRSpy::Status::TEST_BAD) {
- ### Should skip over remaining sibling tests if TEST_BAD
- ### Should count the number of skipped siblings
- $nskipped += 1;
+ my $address = $conn->option('current_test_address');
+ $conn->log("irspy", "top-level test failed!")
+ if $address eq "";
+ my $node = $this->{tree}->select($address);
+ my $skipcount = 0;
+ while (defined $node->next() &&
+ length($node->next()->address()) >= length($address)) {
+ $conn->log("irspy_debug", "skipping from '",
+ $node->address(), "' to '",
+ $node->next()->address(), "'");
+ $node = $node->next();
+ $skipcount++;
+ }
+
+ $conn->option(current_test_address => $node->address());
+ $conn->log("irspy_test", "skipped $skipcount tests");
+ $nskipped += $skipcount;
}
+
+ } elsif ($res == ZOOM::IRSpy::Status::TEST_SKIPPED) {
+ $conn->log("irspy_test", "test skipped during task $task");
+ $conn->current_task(0);
+ $conn->next_task(0);
+ $nskipped++;
+
+ } else {
+ die "unknown callback return-value '$res'";
}
}
$this->log("irspy", "exiting main loop");
- #$this->_rewrite_records();
+ # Sanity checks: none of the following should ever happen
+ my $finished = 1;
+ $this->log("irspy", "performing end-of-run sanity-checks");
+ foreach my $conn (@conn) {
+ my $test = $conn->option("current_test_address");
+ my $next = $this->_next_test($test);
+ if (defined $next) {
+ $this->log("irspy",
+ "$conn (in test '$test') has queued test '$next'");
+ $finished = 0;
+ }
+ if (my $task = $conn->current_task()) {
+ $this->log("irspy", "$conn still has an active task $task");
+ $finished = 0;
+ }
+ if (my $task = $conn->next_task()) {
+ $this->log("irspy", "$conn still has a queued task $task");
+ $finished = 0;
+ }
+ if (!$conn->is_idle()) {
+ $this->log("irspy",
+ "$conn still has ZOOM-C level tasks queued: see below");
+ $finished = 0;
+ }
+ my $ev = $conn->peek_event();
+ if ($ev != 0 && $ev != ZOOM::Event::ZEND) {
+ my $evstr = ZOOM::event_str($ev);
+ $this->log("irspy", "$conn has event $ev ($evstr) waiting");
+ $finished = 0;
+ }
+ if (!$conn->option("rewrote_record")) {
+ $this->log("irspy", "$conn did not rewrite its ZeeRex record");
+ $finished = 0;
+ }
+ }
+
+ # This really shouldn't be necessary, and in practice it rarely
+ # helps, but it's belt and braces. (For now, we don't do this
+ # hence the zero in the $nruns check).
+ if (!$finished) {
+ if (++$nruns < 0) {
+ $this->log("irspy", "back into main loop, ${nruns}th time");
+ goto ROUND_AND_ROUND_WE_GO;
+ } else {
+ $this->log("irspy", "bailing after $nruns main-loop runs");
+ }
+ }
+
+ # This shouldn't happen emit anything either:
+ while ((my $i1 = ZOOM::event(\@conn)) > 0) {
+ my $conn = $conn[$i1-1];
+ my $ev = $conn->last_event();
+ my $evstr = ZOOM::event_str($ev);
+ $this->log("irspy",
+ "$conn still has ZOOM-C level task queued: $ev ($evstr)")
+ if $ev != ZOOM::Event::ZEND;
+ }
+
return $nskipped;
}
+# Exactly equivalent to ZOOM::event() except that it is tolerant to
+# undefined values in the array being passed in.
+#
+sub __UNUSED_tolerant_ZOOM_event {
+ my($connref) = @_;
+
+ my(@conn, @map);
+ foreach my $i (0 .. @$connref-1) {
+ my $conn = $connref->[$i];
+ if (defined $conn) {
+ push @conn, $conn;
+ push @map, $i;
+ }
+ }
+
+ my $res = ZOOM::event(\@conn);
+ return $res if $res <= 0;
+ my $res2 = $map[$res-1] + 1;
+ print STDERR "*** tolerant_ZOOM_event() returns $res->$res2\n";
+ return $res2;
+}
+
+
sub _gather_tests {
my $this = shift();
my($tname, @ancestors) = @_;
join(" -> ", @ancestors, $tname))
if grep { $_ eq $tname } @ancestors;
+ my $slashSeperatedTname = $tname;
+ $slashSeperatedTname =~ s/::/\//g;
+ my $fullName = "ZOOM/IRSpy/Test/$slashSeperatedTname.pm";
+
eval {
- my $slashSeperatedTname = $tname;
- $slashSeperatedTname =~ s/::/\//g;
- require "ZOOM/IRSpy/Test/$slashSeperatedTname.pm";
+ require $fullName;
+ $this->log("irspy", "successfully required '$fullName'");
}; if ($@) {
+ $this->log("irspy", "couldn't require '$fullName': $@");
$this->log("warn", "can't load test '$tname': skipping",
$@ =~ /^Can.t locate/ ? () : " ($@)");
return undef;
}
+# These next three should arguably be Node methods
sub _next_test {
my $this = shift();
my($address, $omit_child) = @_;
}
+sub _last_sibling_test {
+ my $this = shift();
+ my($address) = @_;
+
+ return undef
+ if !defined $this->_next_sibling_test($address);
+
+ my $nskipped = 0;
+ while (1) {
+ my $maybe = $this->_next_sibling_test($address);
+ last if !defined $maybe;
+ $nskipped++;
+ $address = $maybe;
+ $this->log("irspy", "skipping $nskipped tests to '$address'");
+ }
+
+ return ($address, $nskipped);
+}
+
+
+sub _next_sibling_test {
+ my $this = shift();
+ my($address) = @_;
+
+ my @components = split /:/, $address;
+ my $last = pop @components;
+ my $maybe = join(":", @components, $last+1);
+ return $maybe if $this->{tree}->select($maybe);
+ return undef;
+}
+
+
=head1 SEE ALSO
ZOOM::IRSpy::Record,