2 package ZOOM::IRSpy::Stats;
9 use ZOOM::IRSpy::Utils qw(irspy_xpath_context);
13 ZOOM::IRSpy::Stats - statistics generated for IRSpy about its targets
17 $stats = new ZOOM::IRSpy::Stats($dbname);
22 Provides a simple API to obtaining statistics about targets registered
23 in IRSpy. This is done just by creating a Stats object. Once this
24 object is made, it can be crudely printed using the built-in debugging
25 C<print()> method, or the application can walk the structure to
32 $stats = new ZOOM::IRSpy::Stats($dbname, "dc.creator=wedel");
34 $stats = new ZOOM::IRSpy::Stats($dbname,
35 new ZOOM::Query::PQF('@attr 1=1003 wedel');
37 $spy = new ZOOM::Connection("target/string/for/irspy/database");
38 $stats = new ZOOM::IRSpy::Stats($spy, $query);
40 Creates a new C<ZOOM::IRSpy::Stats> object and populates it with
41 statistics for the targets in the nominated database. This process
42 involves analysing the nominated IRSpy database at some length, and
43 which therefore takes some time
45 Either one or two arguments are required:
49 =item $conn (mandatory)
51 An indication of the IRSpy database that statistics are required for.
52 This may be in the form of a C<ZOOM::Connection> object or a
53 database-name string such as C<localhost:8018/IR-Explain---1>.
55 =item $query (optional)
57 The query with which to select a subset of the database to be
58 analysed. This may be in the form of a C<ZOOM::Query> object (using
59 any of the supported subclasses) or a CQL string. If this is omitted,
60 then all records in the database are included in the generated
69 my($conn, $query) = @_;
71 $query ||= "cql.allRecords=1",
72 $conn = new ZOOM::Connection($conn) if !ref $conn;
73 $query = new ZOOM::Query::CQL($query) if !ref $query;
75 my $oldSyntax = $conn->option("preferredRecordSyntax");
76 my $oldESN = $conn->option("elementSetName");
77 my $oldPC = $conn->option("presentChunk");
78 $conn->option(preferredRecordSyntax => "xml");
79 $conn->option(elementSetName => "zeerex");
80 # $conn->option(presentChunk => 10);
82 my $rs = $conn->search($query);
86 host => $conn->option("host"),
93 $this->_gather_stats();
94 $conn->option(preferredRecordSyntax => $oldSyntax);
95 $conn->option(elementSetName => $oldESN);
96 $conn->option(presentChunk => $oldPC);
105 foreach my $i (0 .. $this->{n}-1) {
106 my $rec = $this->{rs}->record($i);
107 my $xc = irspy_xpath_context($rec);
109 # The ten most commonly supported Bib-1 Use attributes
110 foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) {
111 $this->{bib1AccessPoints}->{$node->findvalue(".")}++;
114 # Record syntax support by database
115 foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) {
116 $this->{recordSyntaxes}->{$node->findvalue(".")}++;
120 foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) {
121 $this->{explain}->{$node->findvalue(".")}++;
124 # Z39.50 Protocol Services Support
125 foreach my $node ($xc->findnodes('e:configInfo/e:supports')) {
126 my $supports = $node->findvalue('@type');
127 if ($node->findvalue(".") && $supports =~ s/^z3950_//) {
128 $this->{z3950_init_opt}->{$supports}++;
132 # Z39.50 Server Atlas
133 ### TODO -- awkward, should be considered an enhancement
136 my $host = $xc->findvalue('e:serverInfo/e:host');
138 $this->{domains}->{$host}++;
141 foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) {
142 $this->{implementation}->{$node->findvalue(".")}++;
143 last; # This is because many of the records are still
144 # polluted with multiple implementationName elements
145 # from back then XSLT stylesheet that generated
146 # ZeeRex records was wrong.
156 Prints an ugly but human-readable summary of the statistics on
164 print "database = '", $this->{conn}->option("host"), "'\n";
165 print "query = '", $this->{query}, "'\n";
166 print "result set = '", $this->{rs}, "'\n";
167 print "count = '", $this->{n}, "'\n";
170 print "\nTOP 10 BIB-1 ATTRIBUTES\n";
171 $hr = $this->{bib1AccessPoints};
172 foreach my $key ((sort { $hr->{$b} <=> $hr->{$a}
173 || $a <=> $b } keys %$hr)[0..9]) {
174 print sprintf("%6d%20s%5d (%d%%)\n",
175 $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n});
178 print "\nRECORD SYNTAXES\n";
179 $hr = $this->{recordSyntaxes};
180 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
181 || $a cmp $b } keys %$hr) {
182 print sprintf("%-26s%5d (%d%%)\n",
183 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
186 print "\nEXPLAIN SUPPORT\n";
187 $hr = $this->{explain};
188 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
189 || $a cmp $b } keys %$hr) {
190 print sprintf("%-26s%5d (%d%%)\n",
191 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
194 print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n";
195 $hr = $this->{z3950_init_opt};
196 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
197 || $a cmp $b } keys %$hr) {
198 print sprintf("%-26s%5d (%d%%)\n",
199 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
202 print "\nTOP-LEVEL DOMAINS\n";
203 $hr = $this->{domains};
204 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
205 || $a cmp $b } keys %$hr) {
206 print sprintf("%-26s%5d (%d%%)\n",
207 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
210 print "\nIMPLEMENTATIONS\n";
211 $hr = $this->{implementation};
212 foreach my $key (sort { $hr->{$b} <=> $hr->{$a}
213 || $a cmp $b } keys %$hr) {
214 print sprintf("%-26s%5d (%d%%)\n",
215 $key, $hr->{$key}, 100*$hr->{$key}/$this->{n});
226 Mike Taylor, E<lt>mike@indexdata.comE<gt>
228 =head1 COPYRIGHT AND LICENSE
230 Copyright (C) 2006 by Index Data ApS.
232 This library is free software; you can redistribute it and/or modify
233 it under the same terms as Perl itself, either Perl version 5.8.7 or,
234 at your option, any later version of Perl 5 you may have available.