X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=blobdiff_plain;f=lib%2FZOOM%2FIRSpy%2FStats.pm;h=af667dcede609315b337be1a1281a56c938dd206;hp=7eccf26156b6e25a68f58ba35a25b5ac5f0919f8;hb=3f62972e9c6f2c307e1a32c18de36860c47cffa2;hpb=701f60e56bcd457112098d7d79ca701b590b28d8 diff --git a/lib/ZOOM/IRSpy/Stats.pm b/lib/ZOOM/IRSpy/Stats.pm index 7eccf26..af667dc 100644 --- a/lib/ZOOM/IRSpy/Stats.pm +++ b/lib/ZOOM/IRSpy/Stats.pm @@ -1,4 +1,4 @@ -# $Id: Stats.pm,v 1.2 2006-12-15 10:36:36 mike Exp $ +# $Id: Stats.pm,v 1.7 2007-10-31 16:42:13 mike Exp $ package ZOOM::IRSpy::Stats; @@ -49,7 +49,7 @@ Either one or two arguments are required: An indication of the IRSpy database that statistics are required for. This may be in the form of a C object or a -database-name string such as C. +database-name string such as C. =item $query (optional) @@ -76,12 +76,13 @@ sub new { my $oldPC = $conn->option("presentChunk"); $conn->option(preferredRecordSyntax => "xml"); $conn->option(elementSetName => "zeerex"); - $conn->option(presentChunk => 10); +# $conn->option(presentChunk => 10); my $rs = $conn->search($query); my $n = $rs->size(); my $this = bless { + host => $conn->option("host"), conn => $conn, query => $query, rs => $rs, @@ -104,9 +105,45 @@ sub _gather_stats { my $rec = $this->{rs}->record($i); my $xc = irspy_xpath_context($rec); + # The ten most commonly supported Bib-1 Use attributes foreach my $node ($xc->findnodes('e:indexInfo/e:index[@search="true"]/e:map/e:attr[@type=1 and @set="bib-1"]')) { $this->{bib1AccessPoints}->{$node->findvalue(".")}++; } + + # Record syntax support by database + foreach my $node ($xc->findnodes('e:recordInfo/e:recordSyntax/@name')) { + $this->{recordSyntaxes}->{$node->findvalue(".")}++; + } + + # Explain support + foreach my $node ($xc->findnodes('i:status/i:explain[@ok="1"]/@category')) { + $this->{explain}->{$node->findvalue(".")}++; + } + + # Z39.50 Protocol Services Support + foreach my $node ($xc->findnodes('e:configInfo/e:supports')) { + my $supports = $node->findvalue('@type'); + if ($node->findvalue(".") && $supports =~ s/^z3950_//) { + $this->{z3950_init_opt}->{$supports}++; + } + } + + # Z39.50 Server Atlas + ### TODO -- awkward, should be considered an enhancement + + # Top Domains + my $host = $xc->findvalue('e:serverInfo/e:host'); + $host =~ s/.*\.//; + $this->{domains}->{$host}++; + + # Implementation + foreach my $node ($xc->findnodes('i:status/i:serverImplementationName/@value')) { + $this->{implementation}->{$node->findvalue(".")}++; + last; # This is because many of the records are still + # polluted with multiple implementationName elements + # from back then XSLT stylesheet that generated + # ZeeRex records was wrong. + } } } @@ -127,13 +164,54 @@ sub print { print "query = '", $this->{query}, "'\n"; print "result set = '", $this->{rs}, "'\n"; print "count = '", $this->{n}, "'\n"; - print "\n"; - print "BIB-1 ATTRIBUTES\n"; - my $ap = $this->{bib1AccessPoints}; - foreach my $attr (sort { $ap->{$b} <=> $ap->{$a} - || $a <=> $b } keys %$ap) { - print sprintf("%6d%20s%d (%d%%)\n", - $attr, "", $ap->{$attr}, 100*$ap->{$attr}/$this->{n}); + my $hr; + + print "\nTOP 10 BIB-1 ATTRIBUTES\n"; + $hr = $this->{bib1AccessPoints}; + foreach my $key ((sort { $hr->{$b} <=> $hr->{$a} + || $a <=> $b } keys %$hr)[0..9]) { + print sprintf("%6d%20s%5d (%d%%)\n", + $key, "", $hr->{$key}, 100*$hr->{$key}/$this->{n}); + } + + print "\nRECORD SYNTAXES\n"; + $hr = $this->{recordSyntaxes}; + foreach my $key (sort { $hr->{$b} <=> $hr->{$a} + || $a cmp $b } keys %$hr) { + print sprintf("%-26s%5d (%d%%)\n", + $key, $hr->{$key}, 100*$hr->{$key}/$this->{n}); + } + + print "\nEXPLAIN SUPPORT\n"; + $hr = $this->{explain}; + foreach my $key (sort { $hr->{$b} <=> $hr->{$a} + || $a cmp $b } keys %$hr) { + print sprintf("%-26s%5d (%d%%)\n", + $key, $hr->{$key}, 100*$hr->{$key}/$this->{n}); + } + + print "\nZ39.50 PROTOCOL SERVICES SUPPORT\n"; + $hr = $this->{z3950_init_opt}; + foreach my $key (sort { $hr->{$b} <=> $hr->{$a} + || $a cmp $b } keys %$hr) { + print sprintf("%-26s%5d (%d%%)\n", + $key, $hr->{$key}, 100*$hr->{$key}/$this->{n}); + } + + print "\nTOP-LEVEL DOMAINS\n"; + $hr = $this->{domains}; + foreach my $key (sort { $hr->{$b} <=> $hr->{$a} + || $a cmp $b } keys %$hr) { + print sprintf("%-26s%5d (%d%%)\n", + $key, $hr->{$key}, 100*$hr->{$key}/$this->{n}); + } + + print "\nIMPLEMENTATIONS\n"; + $hr = $this->{implementation}; + foreach my $key (sort { $hr->{$b} <=> $hr->{$a} + || $a cmp $b } keys %$hr) { + print sprintf("%-26s%5d (%d%%)\n", + $key, $hr->{$key}, 100*$hr->{$key}/$this->{n}); } }