X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=blobdiff_plain;f=lib%2FZOOM%2FIRSpy%2FUtils.pm;h=eeffccced7a12a29bb0fc2e363d1f393c525bf8e;hp=ad98b155768f642387f8cfd8f23b5b12bec94e56;hb=70f35e9d28e4b25f763baee0602e2eac25264ebf;hpb=39556c02c62aade0988849bd82f5f16d16919c1f diff --git a/lib/ZOOM/IRSpy/Utils.pm b/lib/ZOOM/IRSpy/Utils.pm index ad98b15..eeffccc 100644 --- a/lib/ZOOM/IRSpy/Utils.pm +++ b/lib/ZOOM/IRSpy/Utils.pm @@ -1,4 +1,3 @@ -# $Id: Utils.pm,v 1.21 2006-12-18 15:34:54 mike Exp $ package ZOOM::IRSpy::Utils; @@ -6,22 +5,99 @@ use 5.008; use strict; use warnings; +use Scalar::Util; + use Exporter 'import'; -our @EXPORT_OK = qw(isodate +our @EXPORT_OK = qw(utf8param + trimField + utf8paramTrim + isodate xml_encode cql_quote cql_target irspy_xpath_context + irspy_make_identifier + irspy_record2identifier + irspy_identifier2target modify_xml_document - bib1_access_point); + bib1_access_point + render_record + validate_record + calc_reliability_string + calc_reliability_stats); use XML::LibXML; use XML::LibXML::XPathContext; +use Encode; +use Encode qw(is_utf8); + our $IRSPY_NS = 'http://indexdata.com/irspy/1.0'; +# Under Apache 2/mod_perl 2, the ubiquitous $r is no longer and +# Apache::Request object, nor even an Apache2::Request, but an +# Apache2::RequestReq ... which, astonishingly, doesn't have the +# param() method. So if we're given one of these things, we need to +# make an Apache::Request out of, which at least isn't too hard. +# However *sigh* this may not be a cheap operation, so we keep a cache +# of already-made Request objects. +# +my %_apache2request; +my %_paramsbyrequest; # Used for Apache2 only +sub utf8param { + my($r, $key, $value) = @_; + + if ($r->isa('Apache2::RequestRec')) { + # Running under Apache2 + if (defined $_apache2request{$r}) { + #warn "using existing Apache2::RequestReq for '$r'"; + $r = $_apache2request{$r}; + } else { + require Apache2::Request; + #warn "making new Apache2::RequestReq for '$r'"; + $r = $_apache2request{$r} = new Apache2::Request($r); + } + } + + if (!defined $key) { + return map { decode_utf8($_) } $r->param(); + } + + my $raw = undef; + $raw = $_paramsbyrequest{$r}->{$key} if $r->isa('Apache2::Request'); + $raw = $r->param($key) if !defined $raw; + + if (defined $value) { + # Argh! Simply writing through to the underlying method + # param() won't work in Apache2, where param() is readonly. + # So we have to keep a hash of additional values, which we + # consult (above) before the actual parameters. Ouch ouch. + if ($r->isa('Apache2::Request')) { + $_paramsbyrequest{$r}->{$key} = encode_utf8($value); + } else { + $r->param($key, encode_utf8($value)); + } + } + + return undef if !defined $raw; + my $cooked = decode_utf8($raw); + warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw; + return $cooked; +} # Utility functions follow, exported for use of web UI +sub utf8param_apache1 { + my($r, $key, $value) = @_; + die "utf8param() called with value '$value'" if defined $value; + + my $raw = $r->param($key); + return undef if !defined $raw; + my $cooked = decode_utf8($raw); + warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw; + return $cooked; +} + + sub isodate { my($time) = @_; @@ -30,6 +106,26 @@ sub isodate { $year+1900, $mon+1, $mday, $hour, $min, $sec); } +# strips whitespaces at start and ends of a field +sub trimField { + my $field = shift; + + $field =~ s/^\s+//; + $field =~ s/\s+$//; + + return $field; +} + +# utf8param() with trim +sub utf8paramTrim { + my $result = utf8param(@_); + + if (defined $result) { + $result = trimField($result); + } + + return $result; +} # I can't -- just can't, can't, can't -- believe that this function # isn't provided by one of the core XML modules. But the evidence all @@ -66,19 +162,25 @@ sub xml_encode { sub cql_quote { my($term) = @_; - $term =~ s/([""\\])/\\$1/g; - $term = qq["$term"] if $term =~ /\s/; + $term =~ s/([""\\*?])/\\$1/g; + $term = qq["$term"] if $term =~ /[\s""\/]/; return $term; } -# Makes a CQL query that finds a specified target +# Makes a CQL query that finds a specified target. Arguments may be +# either an ID alone, or a (host, port, db) triple. sub cql_target { - my($host, $port, $db) = @_; + my($protocol, $host, $port, $db) = @_; - return ("host=" . cql_quote($host) . " and " . - "port=" . cql_quote($port) . " and " . - "path=" . cql_quote($db)); + my $id; + if (defined $host) { + $id = irspy_make_identifier($protocol, $host, $port, $db); + } else { + $id = $protocol; + } + + return "rec.id=" . cql_quote($id); } @@ -105,10 +207,19 @@ sub irspy_namespace { sub irspy_xpath_context { my($record) = @_; - my $xml = ref $record ? $record->render() : $record; - my $parser = new XML::LibXML(); - my $doc = $parser->parse_string($xml); - my $root = $doc->getDocumentElement(); + if (ref $record && $record->isa("ZOOM::Record")) { + $record = $record->render(); + } + + my $root; + if (ref $record) { + $root = $record; + } else { + my $parser = new XML::LibXML(); + my $doc = $parser->parse_string($record); + $root = $doc->getDocumentElement(); + } + my $xc = XML::LibXML::XPathContext->new($root); foreach my $prefix (keys %_namespaces) { $xc->registerNs($prefix, $_namespaces{$prefix}); @@ -117,6 +228,81 @@ sub irspy_xpath_context { } +# Construct an opaque identifier from its components. Although it's +# trivial, this is needed in so many places that it really needs to be +# factored out. +# +# This is the converse of _parse_target_string() in IRSpy.pm, which +# should be renamed and moved into this package. +# +sub irspy_make_identifier { + my($protocol, $host, $port, $dbname) = @_; + + die "irspy_make_identifier(" . join(", ", map { "'$_'" } @_). + "): wrong number of arguments" if @_ != 4; + + die "irspy_make_identifier(): protocol undefined" if !defined $protocol; + die "irspy_make_identifier(): host undefined" if !defined $host; + die "irspy_make_identifier(): port undefined" if !defined $port; + die "irspy_make_identifier(): dbname undefined" if !defined $dbname; + + return "$protocol:$host:$port/$dbname"; +} + + +# Returns the opaque identifier of an IRSpy record based on the +# XPathContext'ed DOM object, as returned by irspy_xpath_context(). +# This is doing the same thing as irspy_make_identifier() but from a +# record rather than a set of parameters. +# +sub irspy_record2identifier { + my($xc) = @_; + + ### Must be kept the same as is used in ../../../zebra/*.xsl + return $xc->find("concat(e:serverInfo/\@protocol, ':', + e:serverInfo/e:host, ':', + e:serverInfo/e:port, '/', + e:serverInfo/e:database)"); +} + + +# Transforms an IRSpy opqaue identifier, as returned from +# irspy_make_identifier() or irspy_record2identifier(), into a YAZ +# target-string suitable for feeding to ZOOM. Before we introduced +# the protocol element at the start of the identifier string, this was +# a null transform; now we have to be a bit cleverer. +# +sub irspy_identifier2target { + my $res = _irspy_identifier2target(@_); + #carp "converted ID '@_' to target '$res'"; + return $res; +} + +sub _irspy_identifier2target { + my($id) = @_; + + confess "_irspy_identifier2target(): id is undefined" + if !defined $id; + + my($protocol, $target) = ($id =~ /(.*?):(.*)/); + if (uc($protocol) eq "Z39.50" || uc($protocol) eq "TCP") { + return "tcp:$target"; + } elsif (uc($protocol) eq "SRU") { + return "sru=get,http:$target"; + } elsif (uc($protocol) eq "SRW") { + return "sru=srw,http:$target"; + } + + warn "_irspy_identifier2target($id): unrecognised protocol '$protocol'"; + return $target; +} + + +# Modifies the XML document for which $xc is an XPath context by +# inserting or replacing the values specified in the hash %$data. The +# keys are fieldnames, which are looked up in the register +# $fieldsByKey to determine, among other things, what their XPath is. + sub modify_xml_document { my($xc, $fieldsByKey, $data) = @_; @@ -146,27 +332,28 @@ sub modify_xml_document { # we'll check whether the element is already # canonical, to determine whether our change is a # no-op. - my $old = "???"; + my $old = ""; my @children = $node->childNodes(); if (@children == 1) { my $child = $node->firstChild(); if (ref $child && ref $child eq "XML::LibXML::Text") { $old = $child->getData(); - next if $value eq $old; + #print STDERR "child='$child', old=", _renderchars($old), "\n" if $key eq "title"; } } + next if $value eq $old; $node->removeChildNodes(); my $child = new XML::LibXML::Text($value); $node->appendChild($child); push @changes, $ref; - #print "Elem $key: '$old' -> '$value' ($xpath)
\n"; + #print STDERR "Elem $key ($xpath): ", _renderchars($old), " -> '", _renderchars($value), "\n"; } else { warn "unexpected node type $node"; } } else { - next if !$value; # No need to create a new empty node + next if !defined $value; # No need to create a new empty node my($ppath, $selector) = $xpath =~ /(.*)\/(.*)/; dom_add_node($xc, $ppath, $selector, $value, @addAfter); #print "New $key ($xpath) = '$value'
\n"; @@ -178,6 +365,13 @@ sub modify_xml_document { } +sub _renderchars { + my($text) = @_; + + return "'" . $text . "'", " (", join(" ", map {ord($_)} split //, $text), "), is_utf8=" , is_utf8($text); +} + + sub dom_add_node { my($xc, $ppath, $selector, $value, @addAfter) = @_; @@ -289,7 +483,7 @@ sub inheritance_tree { # This function is made available in xslt using the register_function call sub xslt_strcmp { my ($arg1, $arg2) = @_; - return ($arg1->to_literal()) cmp ($arg2->to_literal()); + return "$arg1" cmp "$arg2"; } @@ -592,4 +786,81 @@ sub bib1_access_point { } +sub render_record { + my($rs, $which, $elementSetName) = @_; + + # There is a slight race condition here on the element-set name, + # but it shouldn't be a problem as this is (currently) only called + # from parts of the program that run single-threaded. + my $old = $rs->option(elementSetName => $elementSetName); + my $rec = $rs->record($which); + $rs->option(elementSetName => $old); + + return $rec->render(); +} + + +sub calc_reliability_string { + my($xc) = @_; + + my($nok, $nall, $percent) = calc_reliability_stats($xc); + return "[untested]" if $nall == 0; + return "$nok/$nall = " . $percent . "%"; +} + + +sub calc_reliability_stats { + my($xc) = @_; + + my @allpings = $xc->findnodes("i:status/i:probe"); + my $nall = @allpings; + return (0, 0, 0) if $nall == 0; + my @okpings = $xc->findnodes('i:status/i:probe[@ok = "1"]'); + my $nok = @okpings; + my $percent = int(100*$nok/$nall + 0.5); + return ($nok, $nall, $percent); +} + +# +# validate_record( record, ( "port" => 1, "database" => 1, "country" => 0, ... )) +# +sub validate_record { + my $rec = shift; + my %args = @_; + + my %required = map { $_ => 1 } qw/port host database/; + my %optional = map { $_ => 1 } qw/country type hosturl contact language/; + my %tests = ( %required, %args ); + + my $xc = irspy_xpath_context($rec); + + my $protocol = $xc->findnodes("e:serverInfo/\@protocol") || ""; + my $port = $xc->findnodes("e:serverInfo/e:port") || ""; + my $host = $xc->findnodes("e:serverInfo/e:host") || ""; + my $dbname = $xc->findnodes("e:serverInfo/e:database") || ""; + + my $id = irspy_make_identifier($protocol, $host, $port, $dbname); + + my @errors = $id; + + if ($tests{'port'}) { + push(@errors, 'This port number is not valid') if $port !~ /^\d+$/; + } + + if ($tests{'host'}) { + push(@errors, 'This host name is not valid') if $host !~ /^[0-9a-z]+[0-9a-z\.\-]*\.[0-9a-z]+$/i; + } + + if ($tests{'database'}) { + push(@errors, 'This database name is not valid') if $dbname !~ /^[\w_\-\.]+$/i; + } + + if ($tests{'hosturl'}) { + my $hosturl = $xc->findnodes("i:status/i:hostURL") || ""; + push(@errors, 'This hosturl name is not valid') if $hosturl !~ /^\w+$/i; + } + + return ( !$#errors, \@errors ); +} + 1;