X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=blobdiff_plain;f=lib%2FZOOM%2FIRSpy%2FUtils.pm;h=7853d20816115e3201b01e0ad747f6c3968d99a8;hp=e24fd6ca569c6478f602f7c74aa98bb87e4c98f8;hb=5be3dea6bacf251661cfa9b3317b3b62e01da295;hpb=7e30dd5f86e43b3e2f19b33d713d413168c7fd6b diff --git a/lib/ZOOM/IRSpy/Utils.pm b/lib/ZOOM/IRSpy/Utils.pm index e24fd6c..7853d20 100644 --- a/lib/ZOOM/IRSpy/Utils.pm +++ b/lib/ZOOM/IRSpy/Utils.pm @@ -1,4 +1,4 @@ -# $Id: Utils.pm,v 1.27 2007-04-27 14:04:40 mike Exp $ +# $Id: Utils.pm,v 1.38 2009-04-15 18:16:45 wosch Exp $ package ZOOM::IRSpy::Utils; @@ -7,7 +7,8 @@ use strict; use warnings; use Exporter 'import'; -our @EXPORT_OK = qw(isodate +our @EXPORT_OK = qw(utf8param + isodate xml_encode cql_quote cql_target @@ -21,11 +22,77 @@ our @EXPORT_OK = qw(isodate use XML::LibXML; use XML::LibXML::XPathContext; +use Encode; +use Encode qw(is_utf8); + our $IRSPY_NS = 'http://indexdata.com/irspy/1.0'; +# Under Apache 2/mod_perl 2, the ubiquitous $r is no longer and +# Apache::Request object, nor even an Apache2::Request, but an +# Apache2::RequestReq ... which, astonishingly, doesn't have the +# param() method. So if we're given one of these things, we need to +# make an Apache::Request out of, which at least isn't too hard. +# However *sigh* this may not be a cheap operation, so we keep a cache +# of already-made Request objects. +# +my %_apache2request; +my %_paramsbyrequest; # Used for Apache2 only +sub utf8param { + my($r, $key, $value) = @_; + + if ($r->isa('Apache2::RequestRec')) { + # Running under Apache2 + if (defined $_apache2request{$r}) { + #warn "using existing Apache2::RequestReq for '$r'"; + $r = $_apache2request{$r}; + } else { + require Apache2::Request; + #warn "making new Apache2::RequestReq for '$r'"; + $r = $_apache2request{$r} = new Apache2::Request($r); + } + } + + if (!defined $key) { + return map { decode_utf8($_) } $r->param(); + } + + my $raw = undef; + $raw = $_paramsbyrequest{$r}->{$key} if $r->isa('Apache2::Request'); + $raw = $r->param($key) if !defined $raw; + + if (defined $value) { + # Argh! Simply writing through to the underlying method + # param() won't work in Apache2, where param() is readonly. + # So we have to keep a hash of additional values, which we + # consult (above) before the actual parameters. Ouch ouch. + if ($r->isa('Apache2::Request')) { + $_paramsbyrequest{$r}->{$key} = encode_utf8($value); + } else { + $r->param($key, encode_utf8($value)); + } + } + + return undef if !defined $raw; + my $cooked = decode_utf8($raw); + warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw; + return $cooked; +} + # Utility functions follow, exported for use of web UI +sub utf8param_apache1 { + my($r, $key, $value) = @_; + die "utf8param() called with value '$value'" if defined $value; + + my $raw = $r->param($key); + return undef if !defined $raw; + my $cooked = decode_utf8($raw); + warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw; + return $cooked; +} + + sub isodate { my($time) = @_; @@ -70,7 +137,7 @@ sub xml_encode { sub cql_quote { my($term) = @_; - $term =~ s/([""\\])/\\$1/g; + $term =~ s/([""\\*?])/\\$1/g; $term = qq["$term"] if $term =~ /[\s""\/]/; return $term; } @@ -181,11 +248,27 @@ sub irspy_record2identifier { # a null transform; now we have to be a bit cleverer. # sub irspy_identifier2target { + my $res = _irspy_identifier2target(@_); + #carp "converted ID '@_' to target '$res'"; + return $res; +} + +sub _irspy_identifier2target { my($id) = @_; + confess "_irspy_identifier2target(): id is undefined" + if !defined $id; + my($protocol, $target) = ($id =~ /(.*?):(.*)/); - print STDERR "protocol='$protocol', target='$target'\n"; - ### This assumes everything is Z39.50 + if (uc($protocol) eq "Z39.50") { + return "tcp:$target"; + } elsif (uc($protocol) eq "SRU") { + return "sru=get,http:$target"; + } elsif (uc($protocol) eq "SRW") { + return "sru=srw,http:$target"; + } + + warn "_irspy_identifier2target($id): unrecognised protocol '$protocol'"; return $target; } @@ -219,21 +302,22 @@ sub modify_xml_document { # we'll check whether the element is already # canonical, to determine whether our change is a # no-op. - my $old = "???"; + my $old = ""; my @children = $node->childNodes(); if (@children == 1) { my $child = $node->firstChild(); if (ref $child && ref $child eq "XML::LibXML::Text") { $old = $child->getData(); - next if $value eq $old; + #print STDERR "child='$child', old=", _renderchars($old), "\n" if $key eq "title"; } } + next if $value eq $old; $node->removeChildNodes(); my $child = new XML::LibXML::Text($value); $node->appendChild($child); push @changes, $ref; - #print "Elem $key: '$old' -> '$value' ($xpath)
\n"; + #print STDERR "Elem $key ($xpath): ", _renderchars($old), " -> '", _renderchars($value), "\n"; } else { warn "unexpected node type $node"; } @@ -251,6 +335,13 @@ sub modify_xml_document { } +sub _renderchars { + my($text) = @_; + + return "'" . $text . "'", " (", join(" ", map {ord($_)} split //, $text), "), is_utf8=" , is_utf8($text); +} + + sub dom_add_node { my($xc, $ppath, $selector, $value, @addAfter) = @_; @@ -362,7 +453,7 @@ sub inheritance_tree { # This function is made available in xslt using the register_function call sub xslt_strcmp { my ($arg1, $arg2) = @_; - return ($arg1->to_literal()) cmp ($arg2->to_literal()); + return "$arg1" cmp "$arg2"; }