X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=blobdiff_plain;f=lib%2FZOOM%2FIRSpy%2FUtils.pm;h=2dbbe131accc966ede26ee8c141fd0b96f8bec9e;hp=5db0e331c7e4dbad71fde3c70649f83b6084e908;hb=68379b31e731a6cdd942e99bb339bafe1a4c8dc1;hpb=5cc7c7423877cc60242402f6a33553d5447d8457 diff --git a/lib/ZOOM/IRSpy/Utils.pm b/lib/ZOOM/IRSpy/Utils.pm index 5db0e33..2dbbe13 100644 --- a/lib/ZOOM/IRSpy/Utils.pm +++ b/lib/ZOOM/IRSpy/Utils.pm @@ -1,4 +1,4 @@ -# $Id: Utils.pm,v 1.26 2007-03-19 18:51:03 mike Exp $ +# $Id: Utils.pm,v 1.33 2007-06-27 10:44:57 mike Exp $ package ZOOM::IRSpy::Utils; @@ -7,22 +7,41 @@ use strict; use warnings; use Exporter 'import'; -our @EXPORT_OK = qw(isodate +our @EXPORT_OK = qw(utf8param + isodate xml_encode cql_quote cql_target irspy_xpath_context + irspy_make_identifier + irspy_record2identifier + irspy_identifier2target modify_xml_document bib1_access_point render_record); use XML::LibXML; use XML::LibXML::XPathContext; +use Encode; +use Encode qw(is_utf8); + our $IRSPY_NS = 'http://indexdata.com/irspy/1.0'; # Utility functions follow, exported for use of web UI +sub utf8param { + my($r, $key, $value) = @_; + die "utf8param() called with value '$value'" if defined $value; + + my $raw = $r->param($key); + return undef if !defined $raw; + my $cooked = decode_utf8($raw); + warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw; + return $cooked; +} + + sub isodate { my($time) = @_; @@ -67,7 +86,7 @@ sub xml_encode { sub cql_quote { my($term) = @_; - $term =~ s/([""\\])/\\$1/g; + $term =~ s/([""\\*?])/\\$1/g; $term = qq["$term"] if $term =~ /[\s""\/]/; return $term; } @@ -76,10 +95,16 @@ sub cql_quote { # Makes a CQL query that finds a specified target. Arguments may be # either an ID alone, or a (host, port, db) triple. sub cql_target { - my($host, $port, $db) = @_; + my($protocol, $host, $port, $db) = @_; - $host .= ":$port/$db" if defined $port; - return "rec.id=" . cql_quote($host); + my $id; + if (defined $host) { + $id = irspy_make_identifier($protocol, $host, $port, $db); + } else { + $id = $protocol; + } + + return "rec.id=" . cql_quote($id); } @@ -127,6 +152,73 @@ sub irspy_xpath_context { } +# Construct an opaque identifier from its components. Although it's +# trivial, this is needed in so many places that it really needs to be +# factored out. +# +# This is the converse of _parse_target_string() in IRSpy.pm, which +# should be renamed and moved into this package. +# +sub irspy_make_identifier { + my($protocol, $host, $port, $dbname) = @_; + + die "irspy_make_identifier(" . join(", ", map { "'$_'" } @_). + "): wrong number of arguments" if @_ != 4; + + die "irspy_make_identifier(): protocol undefined" if !defined $protocol; + die "irspy_make_identifier(): host undefined" if !defined $host; + die "irspy_make_identifier(): port undefined" if !defined $port; + die "irspy_make_identifier(): dbname undefined" if !defined $dbname; + + return "$protocol:$host:$port/$dbname"; +} + + +# Returns the opaque identifier of an IRSpy record based on the +# XPathContext'ed DOM object, as returned by irspy_xpath_context(). +# This is doing the same thing as irspy_make_identifier() but from a +# record rather than a set of parameters. +# +sub irspy_record2identifier { + my($xc) = @_; + + ### Must be kept the same as is used in ../../../zebra/*.xsl + return $xc->find("concat(e:serverInfo/\@protocol, ':', + e:serverInfo/e:host, ':', + e:serverInfo/e:port, '/', + e:serverInfo/e:database)"); +} + + +# Transforms an IRSpy opqaue identifier, as returned from +# irspy_make_identifier() or irspy_record2identifier(), into a YAZ +# target-string suitable for feeding to ZOOM. Before we introduced +# the protocol element at the start of the identifier string, this was +# a null transform; now we have to be a bit cleverer. +# +sub irspy_identifier2target { + my $res = _irspy_identifier2target(@_); + #carp "converted ID '@_' to target '$res'"; + return $res; +} + +sub _irspy_identifier2target { + my($id) = @_; + + my($protocol, $target) = ($id =~ /(.*?):(.*)/); + if (uc($protocol) eq "Z39.50") { + return "tcp:$target"; + } elsif (uc($protocol) eq "SRU") { + return "sru=get,http:$target"; + } elsif (uc($protocol) eq "SRW") { + return "sru=srw,http:$target"; + } + + warn "unrecognised protocol '$protocol' in ID $id"; + return $target; +} + + sub modify_xml_document { my($xc, $fieldsByKey, $data) = @_; @@ -156,21 +248,23 @@ sub modify_xml_document { # we'll check whether the element is already # canonical, to determine whether our change is a # no-op. - my $old = "???"; + my $old = ""; my @children = $node->childNodes(); if (@children == 1) { my $child = $node->firstChild(); if (ref $child && ref $child eq "XML::LibXML::Text") { $old = $child->getData(); - next if $value eq $old; + print STDERR "child='$child', old=", _renderchars($old), "\n" + if $key eq "title"; } } + next if $value eq $old; $node->removeChildNodes(); my $child = new XML::LibXML::Text($value); $node->appendChild($child); push @changes, $ref; - #print "Elem $key: '$old' -> '$value' ($xpath)
\n"; + print STDERR "Elem $key ($xpath): ", _renderchars($old), " -> '", _renderchars($value), "\n"; } else { warn "unexpected node type $node"; } @@ -188,6 +282,13 @@ sub modify_xml_document { } +sub _renderchars { + my($text) = @_; + + return "'" . $text . "'", " (", join(" ", map {ord($_)} split //, $text), "), is_utf8=" , is_utf8($text); +} + + sub dom_add_node { my($xc, $ppath, $selector, $value, @addAfter) = @_;