From: Mike Taylor Date: Fri, 15 Dec 2006 10:36:36 +0000 (+0000) Subject: Generate stats for BIB-1 attributes. X-Git-Tag: CPAN-v1.02~54^2~615 X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=commitdiff_plain;h=2a16f7bee31a2074eb2ee349e8eac37831af9647 Generate stats for BIB-1 attributes. Add print() method. --- diff --git a/lib/ZOOM/IRSpy/Stats.pm b/lib/ZOOM/IRSpy/Stats.pm index 631b516..7eccf26 100644 --- a/lib/ZOOM/IRSpy/Stats.pm +++ b/lib/ZOOM/IRSpy/Stats.pm @@ -1,10 +1,11 @@ -# $Id: Stats.pm,v 1.1 2006-12-14 17:34:57 mike Exp $ +# $Id: Stats.pm,v 1.2 2006-12-15 10:36:36 mike Exp $ package ZOOM::IRSpy::Stats; use 5.008; use strict; use warnings; +use ZOOM::IRSpy::Utils qw(irspy_xpath_context); =head1 NAME @@ -13,14 +14,15 @@ ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets =head1 SYNOPSIS $stats = new ZOOM::IRSpy::Stats($dbname); - use Data::Dumper; print Dumper($stats); + $stats->print(); =head1 DESCRIPTION Provides a simple API to obtaining statistics about targets registered in IRSpy. This is done just by creating a Stats object. Once this -object is made, it can be crudely dumped, or the application can walk -the structure to produce nice output. +object is made, it can be crudely printed using the built-in debugging +C method, or the application can walk the structure to +produce nice output. =head1 METHODS @@ -31,7 +33,7 @@ the structure to produce nice output. $stats = new ZOOM::IRSpy::Stats($dbname, new ZOOM::Query::PQF('@attr 1=1003 wedel'); # Or: - $spy = new ZOOM::IRSpy("target/string/for/irspy/database"); + $spy = new ZOOM::Connection("target/string/for/irspy/database"); $stats = new ZOOM::IRSpy::Stats($spy, $query); Creates a new C object and populates it with @@ -43,11 +45,11 @@ Either one or two arguments are required: =over 4 -=item $irspy (mandatory) +=item $conn (mandatory) An indication of the IRSpy database that statistics are required for. -This may be in the form of a C object or a database-name -string such as C. +This may be in the form of a C object or a +database-name string such as C. =item $query (optional) @@ -63,12 +65,76 @@ statistics. sub new { my $class = shift(); - my($irspy, $query) = @_; - - return bless { - irspy => $irspy, - query => $query || "cql.allRecords=1", + my($conn, $query) = @_; + + $query ||= "cql.allRecords=1", + $conn = new ZOOM::Connection($conn) if !ref $conn; + $query = new ZOOM::Query::CQL($query) if !ref $query; + + my $oldSyntax = $conn->option("preferredRecordSyntax"); + my $oldESN = $conn->option("elementSetName"); + my $oldPC = $conn->option("presentChunk"); + $conn->option(preferredRecordSyntax => "xml"); + $conn->option(elementSetName => "zeerex"); + $conn->option(presentChunk => 10); + + my $rs = $conn->search($query); + my $n = $rs->size(); + + my $this = bless { + conn => $conn, + query => $query, + rs => $rs, + n => $n, }, $class; + + $this->_gather_stats(); + $conn->option(preferredRecordSyntax => $oldSyntax); + $conn->option(elementSetName => $oldESN); + $conn->option(presentChunk => $oldPC); + + return $this; +} + + +sub _gather_stats { + my $this = shift(); + + foreach my $i (0 .. $this->{n}-1) { + my $rec = $this->{rs}->record($i); + my $xc = irspy_xpath_context($rec); + + foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) { + $this->{bib1AccessPoints}->{$node->findvalue(".")}++; + } + } +} + + +=head2 print() + + $stats->print(); + +Prints an ugly but human-readable summary of the statistics on +standard output. + +=cut + +sub print { + my $this = shift(); + + print "database = '", $this->{conn}->option("host"), "'\n"; + print "query = '", $this->{query}, "'\n"; + print "result set = '", $this->{rs}, "'\n"; + print "count = '", $this->{n}, "'\n"; + print "\n"; + print "BIB-1 ATTRIBUTES\n"; + my $ap = $this->{bib1AccessPoints}; + foreach my $attr (sort { $ap->{$b} <=> $ap->{$a} + || $a <=> $b } keys %$ap) { + print sprintf("%6d%20s%d (%d%%)\n", + $attr, "", $ap->{$attr}, 100*$ap->{$attr}/$this->{n}); + } }