X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;ds=sidebyside;f=lib%2FZOOM%2FIRSpy%2FUtils.pm;h=856e60250f41ece095a591499d892a4170763f6e;hb=0cf41562dd3be395b42934a4a1a99404ce2a4e68;hp=c31d188ef6f757ad7f8e88501cdb0a0eacd21e6b;hpb=b2d2832eaa18ae8cfef70651c05bdc57069e4dde;p=irspy-moved-to-github.git

diff --git a/lib/ZOOM/IRSpy/Utils.pm b/lib/ZOOM/IRSpy/Utils.pm
index c31d188..856e602 100644
--- a/lib/ZOOM/IRSpy/Utils.pm
+++ b/lib/ZOOM/IRSpy/Utils.pm
@@ -1,4 +1,3 @@
-# $Id: Utils.pm,v 1.22 2007-03-01 13:51:18 mike Exp $
 
 package ZOOM::IRSpy::Utils;
 
@@ -6,21 +5,99 @@ use 5.008;
 use strict;
 use warnings;
 
+use Scalar::Util;
+
 use Exporter 'import';
-our @EXPORT_OK = qw(isodate
+our @EXPORT_OK = qw(utf8param
+		    trimField
+		    utf8paramTrim
+		    isodate
 		    xml_encode 
 		    cql_quote
+		    cql_target
 		    irspy_xpath_context
+		    irspy_make_identifier
+		    irspy_record2identifier
+		    irspy_identifier2target
 		    modify_xml_document
-		    bib1_access_point);
+		    bib1_access_point
+		    render_record
+		    validate_record
+		    calc_reliability_string
+		    calc_reliability_stats);
 
 use XML::LibXML;
 use XML::LibXML::XPathContext;
+use Encode;
+use Encode qw(is_utf8);
+
 
 our $IRSPY_NS = 'http://indexdata.com/irspy/1.0';
 
+# Under Apache 2/mod_perl 2, the ubiquitous $r is no longer and
+# Apache::Request object, nor even an Apache2::Request, but an
+# Apache2::RequestReq ... which, astonishingly, doesn't have the
+# param() method.  So if we're given one of these things, we need to
+# make an Apache::Request out of, which at least isn't too hard.
+# However *sigh* this may not be a cheap operation, so we keep a cache
+# of already-made Request objects.
+#
+my %_apache2request;
+my %_paramsbyrequest;           # Used for Apache2 only
+sub utf8param {
+    my($r, $key, $value) = @_;
+
+    if ($r->isa('Apache2::RequestRec')) {
+        # Running under Apache2
+        if (defined $_apache2request{$r}) {
+            #warn "using existing Apache2::RequestReq for '$r'";
+            $r = $_apache2request{$r};
+        } else {
+            require Apache2::Request;
+            #warn "making new Apache2::RequestReq for '$r'";
+            $r = $_apache2request{$r} = new Apache2::Request($r);
+        }
+    }
+
+    if (!defined $key) {
+        return map { decode_utf8($_) } $r->param();
+    }
+
+    my $raw = undef;
+    $raw = $_paramsbyrequest{$r}->{$key} if $r->isa('Apache2::Request');
+    $raw = $r->param($key) if !defined $raw;
+
+    if (defined $value) {
+        # Argh!  Simply writing through to the underlying method
+        # param() won't work in Apache2, where param() is readonly.
+        # So we have to keep a hash of additional values, which we
+        # consult (above) before the actual parameters.  Ouch ouch.
+        if ($r->isa('Apache2::Request')) {
+            $_paramsbyrequest{$r}->{$key} = encode_utf8($value);
+        } else {
+            $r->param($key, encode_utf8($value));
+        }
+    }
+
+    return undef if !defined $raw;
+    my $cooked = decode_utf8($raw);
+    warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw;
+    return $cooked;
+}
 
 # Utility functions follow, exported for use of web UI
+sub utf8param_apache1 {
+    my($r, $key, $value) = @_;
+    die "utf8param() called with value '$value'" if defined $value;
+
+    my $raw = $r->param($key);
+    return undef if !defined $raw;
+    my $cooked = decode_utf8($raw);
+    warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw;
+    return $cooked;
+}
+
+
 sub isodate {
     my($time) = @_;
 
@@ -29,6 +106,26 @@ sub isodate {
 		   $year+1900, $mon+1, $mday, $hour, $min, $sec);
 }
 
+# strips whitespaces at start and ends of a field
+sub trimField {
+    my $field  = shift;
+
+    $field =~ s/^\s+//;
+    $field =~ s/\s+$//;
+
+    return $field;
+}
+
+# utf8param() with trim
+sub utf8paramTrim {
+    my $result = utf8param(@_);
+
+    if (defined $result) {
+	$result = trimField($result);	
+    }
+
+    return $result;
+}
 
 # I can't -- just can't, can't, can't -- believe that this function
 # isn't provided by one of the core XML modules.  But the evidence all
@@ -65,12 +162,28 @@ sub xml_encode {
 sub cql_quote {
     my($term) = @_;
 
-    $term =~ s/([""\\])/\\$1/g;
-    $term = qq["$term"] if $term =~ /\s/;
+    $term =~ s/([""\\*?])/\\$1/g;
+    $term = qq["$term"] if $term =~ /[\s""\/]/;
     return $term;
 }
 
 
+# Makes a CQL query that finds a specified target.  Arguments may be
+# either an ID alone, or a (host, port, db) triple.
+sub cql_target {
+    my($protocol, $host, $port, $db) = @_;
+
+    my $id;
+    if (defined $host) {
+	$id = irspy_make_identifier($protocol, $host, $port, $db);
+    } else {
+	$id = $protocol;
+    }
+
+    return "rec.id=" . cql_quote($id);
+}
+
+
 # PRIVATE to irspy_namespace() and irspy_xpath_context()
 my %_namespaces = (
 		   e => 'http://explain.z3950.org/dtd/2.0/',
@@ -94,10 +207,19 @@ sub irspy_namespace {
 sub irspy_xpath_context {
     my($record) = @_;
 
-    my $xml = ref $record ? $record->render() : $record;
-    my $parser = new XML::LibXML();
-    my $doc = $parser->parse_string($xml);
-    my $root = $doc->getDocumentElement();
+    if (ref $record && $record->isa("ZOOM::Record")) {
+	$record = $record->render();
+    }
+
+    my $root;
+    if (ref $record) {
+	$root = $record;
+    } else {
+	my $parser = new XML::LibXML();
+	my $doc = $parser->parse_string($record);
+	$root = $doc->getDocumentElement();
+    }
+
     my $xc = XML::LibXML::XPathContext->new($root);
     foreach my $prefix (keys %_namespaces) {
 	$xc->registerNs($prefix, $_namespaces{$prefix});
@@ -106,6 +228,81 @@ sub irspy_xpath_context {
 }
 
 
+# Construct an opaque identifier from its components.  Although it's
+# trivial, this is needed in so many places that it really needs to be
+# factored out.
+#
+# This is the converse of _parse_target_string() in IRSpy.pm, which
+# should be renamed and moved into this package.
+#
+sub irspy_make_identifier {
+    my($protocol, $host, $port, $dbname) = @_;
+
+    die "irspy_make_identifier(" . join(", ", map { "'$_'" } @_).
+	"): wrong number of arguments" if @_ != 4;
+
+    die "irspy_make_identifier(): protocol undefined" if !defined $protocol;
+    die "irspy_make_identifier(): host undefined" if !defined $host;
+    die "irspy_make_identifier(): port undefined" if !defined $port;
+    die "irspy_make_identifier(): dbname undefined" if !defined $dbname;
+
+    return "$protocol:$host:$port/$dbname";
+}
+
+
+# Returns the opaque identifier of an IRSpy record based on the
+# XPathContext'ed DOM object, as returned by irspy_xpath_context().
+# This is doing the same thing as irspy_make_identifier() but from a
+# record rather than a set of parameters.
+#
+sub irspy_record2identifier {
+    my($xc) = @_;
+
+    ### Must be kept the same as is used in ../../../zebra/*.xsl
+    return $xc->find("concat(e:serverInfo/\@protocol, ':',
+			     e:serverInfo/e:host, ':',
+			     e:serverInfo/e:port, '/',
+			     e:serverInfo/e:database)");
+}
+
+
+# Transforms an IRSpy opqaue identifier, as returned from
+# irspy_make_identifier() or irspy_record2identifier(), into a YAZ
+# target-string suitable for feeding to ZOOM.  Before we introduced
+# the protocol element at the start of the identifier string, this was
+# a null transform; now we have to be a bit cleverer.
+#
+sub irspy_identifier2target {
+    my $res = _irspy_identifier2target(@_);
+    #carp "converted ID '@_' to target '$res'";
+    return $res;
+}
+
+sub _irspy_identifier2target {
+    my($id) = @_;
+
+    confess "_irspy_identifier2target(): id is undefined"
+	if !defined $id;
+
+    my($protocol, $target) = ($id =~ /(.*?):(.*)/);
+    if (uc($protocol) eq "Z39.50" || uc($protocol) eq "TCP") {
+	return "tcp:$target";
+    } elsif (uc($protocol) eq "SRU") {
+	return "sru=get,http:$target";
+    } elsif (uc($protocol) eq "SRW") {
+	return "sru=srw,http:$target";
+    }
+
+    warn "_irspy_identifier2target($id): unrecognised protocol '$protocol'";
+    return $target;
+}
+
+
+# Modifies the XML document for which $xc is an XPath context by
+# inserting or replacing the values specified in the hash %$data.  The
+# keys are fieldnames, which are looked up in the register
+# $fieldsByKey to determine, among other things, what their XPath is.
+
 sub modify_xml_document {
     my($xc, $fieldsByKey, $data) = @_;
 
@@ -135,27 +332,28 @@ sub modify_xml_document {
 		# we'll check whether the element is already
 		# canonical, to determine whether our change is a
 		# no-op.
-		my $old = "???";
+		my $old = "";
 		my @children = $node->childNodes();
 		if (@children == 1) {
 		    my $child = $node->firstChild();
 		    if (ref $child && ref $child eq "XML::LibXML::Text") {
 			$old = $child->getData();
-			next if $value eq $old;
+			#print STDERR "child='$child', old=", _renderchars($old), "\n" if $key eq "title";
 		    }
 		}
+		next if $value eq $old;
 
 		$node->removeChildNodes();
 		my $child = new XML::LibXML::Text($value);
 		$node->appendChild($child);
 		push @changes, $ref;
-		#print "Elem $key: '$old' -> '$value' ($xpath)<br/>\n";
+		#print STDERR "Elem $key ($xpath): ", _renderchars($old), " -> '", _renderchars($value), "\n";
 	    } else {
 		warn "unexpected node type $node";
 	    }
 
 	} else {
-	    next if !$value; # No need to create a new empty node
+	    next if !defined $value; # No need to create a new empty node
 	    my($ppath, $selector) = $xpath =~ /(.*)\/(.*)/;
 	    dom_add_node($xc, $ppath, $selector, $value, @addAfter);
 	    #print "New $key ($xpath) = '$value'<br/>\n";
@@ -167,6 +365,13 @@ sub modify_xml_document {
 }
 
 
+sub _renderchars {
+    my($text) = @_;
+
+    return "'" . $text . "'", " (", join(" ", map {ord($_)} split //, $text), "), is_utf8=" , is_utf8($text);
+}
+
+
 sub dom_add_node {
     my($xc, $ppath, $selector, $value, @addAfter) = @_;
 
@@ -278,7 +483,7 @@ sub inheritance_tree {
 # This function is made available in xslt using the register_function call
 sub xslt_strcmp {
     my ($arg1, $arg2) = @_;
-    return ($arg1->to_literal()) cmp ($arg2->to_literal());
+    return "$arg1" cmp "$arg2";
 }
 
 
@@ -581,4 +786,90 @@ sub bib1_access_point {
 }
 
 
+sub render_record {
+    my($rs, $which, $elementSetName) = @_;
+
+    # There is a slight race condition here on the element-set name,
+    # but it shouldn't be a problem as this is (currently) only called
+    # from parts of the program that run single-threaded.
+    my $old = $rs->option(elementSetName => $elementSetName);
+    my $rec = $rs->record($which);
+    $rs->option(elementSetName => $old);
+
+    return $rec->render();
+}
+
+
+sub calc_reliability_string {
+    my($xc) = @_;
+
+    my($nok, $nall, $percent) = calc_reliability_stats($xc);
+    return "[untested]" if $nall == 0;
+    return "$nok/$nall = " . $percent . "%";
+}
+
+
+sub calc_reliability_stats {
+    my($xc) = @_;
+
+    my @allpings = $xc->findnodes("i:status/i:probe");
+    my $nall = @allpings;
+    return (0, 0, 0) if $nall == 0;
+    my @okpings = $xc->findnodes('i:status/i:probe[@ok = "1"]');
+    my $nok = @okpings;
+    my $percent = int(100*$nok/$nall + 0.5);
+    return ($nok, $nall, $percent);
+}
+
+#
+# validate_record( record, ( "port" => 1, "database" => 1, "country" => 0, ... ))
+#
+sub validate_record {
+    my $rec = shift;
+    my %args = @_;
+
+    my %required = map { $_ => 1 } qw/port host database protocol/;
+    my %optional = map { $_ => 1 } qw/country type hosturl contact language/;
+    my %tests = ( %required, %args );
+
+    my $xc = irspy_xpath_context($rec);
+
+    my $protocol = $xc->findnodes("e:serverInfo/\@protocol") || "";
+    my $port = $xc->findnodes("e:serverInfo/e:port") || "";
+    my $host = $xc->findnodes("e:serverInfo/e:host") || "";
+    my $dbname = $xc->findnodes("e:serverInfo/e:database") || "";
+
+    my $id = irspy_make_identifier($protocol, $host, $port, $dbname);
+
+    if ($protocol =~ /\s+$/ || $dbname =~ /\s+$/) {
+	warn "xxx: $protocol:$host:$port:$dbname: whitespaces\n";
+    } 
+
+    my @errors = $id;
+
+    if ($tests{'protocol'}) {
+	push(@errors, 'protocol number is not valid') if $protocol !~ /^(z39\.50|sru|srw|tcp)$/;
+    }
+
+    if ($tests{'port'}) {
+	push(@errors, 'port number is not valid') if $port !~ /^\d+$/;
+    }
+
+    if ($tests{'host'}) {
+	push(@errors, 'host name is not valid') if $host !~ /^[0-9a-z]+[0-9a-z\.\-]*\.[0-9a-z]+$/i;
+    }
+
+    if ($tests{'database'}) {
+	push(@errors, 'database name is not valid') if $dbname =~ m,/,i;
+	push(@errors, 'database has trailing spaces') if $dbname ne trimField($dbname);
+    }
+
+    if ($tests{'hosturl'}) {
+        my $hosturl = $xc->findnodes("i:status/i:hostURL") || "";
+	push(@errors, 'This hosturl name is not valid') if $hosturl !~ /^\w+$/i;
+    }
+
+    return ( !$#errors, \@errors );
+}
+
 1;