-# $Id: Utils.pm,v 1.23 2007-03-01 13:52:54 mike Exp $
+# $Id: Utils.pm,v 1.37 2007-12-12 11:02:37 mike Exp $
package ZOOM::IRSpy::Utils;
use warnings;
use Exporter 'import';
-our @EXPORT_OK = qw(isodate
+our @EXPORT_OK = qw(utf8param
+ isodate
xml_encode
cql_quote
cql_target
irspy_xpath_context
+ irspy_make_identifier
+ irspy_record2identifier
+ irspy_identifier2target
modify_xml_document
- bib1_access_point);
+ bib1_access_point
+ render_record);
use XML::LibXML;
use XML::LibXML::XPathContext;
+use Encode;
+use Encode qw(is_utf8);
+
our $IRSPY_NS = 'http://indexdata.com/irspy/1.0';
# Utility functions follow, exported for use of web UI
+sub utf8param {
+ my($r, $key, $value) = @_;
+ die "utf8param() called with value '$value'" if defined $value;
+
+ my $raw = $r->param($key);
+ return undef if !defined $raw;
+ my $cooked = decode_utf8($raw);
+ warn "converted '$raw' to '", $cooked, "'\n" if $cooked ne $raw;
+ return $cooked;
+}
+
+
sub isodate {
my($time) = @_;
sub cql_quote {
my($term) = @_;
- $term =~ s/([""\\])/\\$1/g;
- $term = qq["$term"] if $term =~ /\s/;
+ $term =~ s/([""\\*?])/\\$1/g;
+ $term = qq["$term"] if $term =~ /[\s""\/]/;
return $term;
}
-# Makes a CQL query that finds a specified target
+# Makes a CQL query that finds a specified target. Arguments may be
+# either an ID alone, or a (host, port, db) triple.
sub cql_target {
- my($host, $port, $db) = @_;
+ my($protocol, $host, $port, $db) = @_;
- return ("host=" . cql_quote($host) . " and " .
- "port=" . cql_quote($port) . " and " .
- "path=" . cql_quote($db));
+ my $id;
+ if (defined $host) {
+ $id = irspy_make_identifier($protocol, $host, $port, $db);
+ } else {
+ $id = $protocol;
+ }
+
+ return "rec.id=" . cql_quote($id);
}
sub irspy_xpath_context {
my($record) = @_;
- my $xml = ref $record ? $record->render() : $record;
- my $parser = new XML::LibXML();
- my $doc = $parser->parse_string($xml);
- my $root = $doc->getDocumentElement();
+ if (ref $record && $record->isa("ZOOM::Record")) {
+ $record = $record->render();
+ }
+
+ my $root;
+ if (ref $record) {
+ $root = $record;
+ } else {
+ my $parser = new XML::LibXML();
+ my $doc = $parser->parse_string($record);
+ $root = $doc->getDocumentElement();
+ }
+
my $xc = XML::LibXML::XPathContext->new($root);
foreach my $prefix (keys %_namespaces) {
$xc->registerNs($prefix, $_namespaces{$prefix});
}
+# Construct an opaque identifier from its components. Although it's
+# trivial, this is needed in so many places that it really needs to be
+# factored out.
+#
+# This is the converse of _parse_target_string() in IRSpy.pm, which
+# should be renamed and moved into this package.
+#
+sub irspy_make_identifier {
+ my($protocol, $host, $port, $dbname) = @_;
+
+ die "irspy_make_identifier(" . join(", ", map { "'$_'" } @_).
+ "): wrong number of arguments" if @_ != 4;
+
+ die "irspy_make_identifier(): protocol undefined" if !defined $protocol;
+ die "irspy_make_identifier(): host undefined" if !defined $host;
+ die "irspy_make_identifier(): port undefined" if !defined $port;
+ die "irspy_make_identifier(): dbname undefined" if !defined $dbname;
+
+ return "$protocol:$host:$port/$dbname";
+}
+
+
+# Returns the opaque identifier of an IRSpy record based on the
+# XPathContext'ed DOM object, as returned by irspy_xpath_context().
+# This is doing the same thing as irspy_make_identifier() but from a
+# record rather than a set of parameters.
+#
+sub irspy_record2identifier {
+ my($xc) = @_;
+
+ ### Must be kept the same as is used in ../../../zebra/*.xsl
+ return $xc->find("concat(e:serverInfo/\@protocol, ':',
+ e:serverInfo/e:host, ':',
+ e:serverInfo/e:port, '/',
+ e:serverInfo/e:database)");
+}
+
+
+# Transforms an IRSpy opqaue identifier, as returned from
+# irspy_make_identifier() or irspy_record2identifier(), into a YAZ
+# target-string suitable for feeding to ZOOM. Before we introduced
+# the protocol element at the start of the identifier string, this was
+# a null transform; now we have to be a bit cleverer.
+#
+sub irspy_identifier2target {
+ my $res = _irspy_identifier2target(@_);
+ #carp "converted ID '@_' to target '$res'";
+ return $res;
+}
+
+sub _irspy_identifier2target {
+ my($id) = @_;
+
+ confess "_irspy_identifier2target(): id is undefined"
+ if !defined $id;
+
+ my($protocol, $target) = ($id =~ /(.*?):(.*)/);
+ if (uc($protocol) eq "Z39.50") {
+ return "tcp:$target";
+ } elsif (uc($protocol) eq "SRU") {
+ return "sru=get,http:$target";
+ } elsif (uc($protocol) eq "SRW") {
+ return "sru=srw,http:$target";
+ }
+
+ warn "_irspy_identifier2target($id): unrecognised protocol '$protocol'";
+ return $target;
+}
+
+
sub modify_xml_document {
my($xc, $fieldsByKey, $data) = @_;
# we'll check whether the element is already
# canonical, to determine whether our change is a
# no-op.
- my $old = "???";
+ my $old = "";
my @children = $node->childNodes();
if (@children == 1) {
my $child = $node->firstChild();
if (ref $child && ref $child eq "XML::LibXML::Text") {
$old = $child->getData();
- next if $value eq $old;
+ #print STDERR "child='$child', old=", _renderchars($old), "\n" if $key eq "title";
}
}
+ next if $value eq $old;
$node->removeChildNodes();
my $child = new XML::LibXML::Text($value);
$node->appendChild($child);
push @changes, $ref;
- #print "Elem $key: '$old' -> '$value' ($xpath)<br/>\n";
+ #print STDERR "Elem $key ($xpath): ", _renderchars($old), " -> '", _renderchars($value), "\n";
} else {
warn "unexpected node type $node";
}
}
+sub _renderchars {
+ my($text) = @_;
+
+ return "'" . $text . "'", " (", join(" ", map {ord($_)} split //, $text), "), is_utf8=" , is_utf8($text);
+}
+
+
sub dom_add_node {
my($xc, $ppath, $selector, $value, @addAfter) = @_;
# This function is made available in xslt using the register_function call
sub xslt_strcmp {
my ($arg1, $arg2) = @_;
- return ($arg1->to_literal()) cmp ($arg2->to_literal());
+ return "$arg1" cmp "$arg2";
}
}
+sub render_record {
+ my($rs, $which, $elementSetName) = @_;
+
+ # There is a slight race condition here on the element-set name,
+ # but it shouldn't be a problem as this is (currently) only called
+ # from parts of the program that run single-threaded.
+ my $old = $rs->option(elementSetName => $elementSetName);
+ my $rec = $rs->record($which);
+ $rs->option(elementSetName => $old);
+
+ return $rec->render();
+}
+
+
1;