X-Git-Url: http://git.indexdata.com/?p=irspy-moved-to-github.git;a=blobdiff_plain;f=lib%2FZOOM%2FIRSpy%2FRecord.pm;h=165f62e525c6ed7304515a7b596c57c32e6ec253;hp=db1b8659fd8867e8059467d04d7647c80e0ce0d6;hb=4172edcd817f7b978a772fe2b8365e4f1ee8f3bb;hpb=c3fa325a38274107a241d301a40a3c8fd1c21779 diff --git a/lib/ZOOM/IRSpy/Record.pm b/lib/ZOOM/IRSpy/Record.pm index db1b865..165f62e 100644 --- a/lib/ZOOM/IRSpy/Record.pm +++ b/lib/ZOOM/IRSpy/Record.pm @@ -1,22 +1,26 @@ -# $Id: Record.pm,v 1.1 2006-06-20 12:28:26 mike Exp $ +# $Id: Record.pm,v 1.10 2006-07-25 16:53:28 mike Exp $ -package Net::Z3950::IRSpy::Record; +package ZOOM::IRSpy::Record; use 5.008; use strict; use warnings; +use XML::LibXML; +use XML::LibXML::XPathContext; + + =head1 NAME -Net::Z3950::IRSpy::Record - record describing a target for IRSpy +ZOOM::IRSpy::Record - record describing a target for IRSpy =head1 SYNOPSIS - ### To follow + ## To follow =head1 DESCRIPTION -I<### To follow> +I<## To follow> =cut @@ -24,17 +28,122 @@ sub new { my $class = shift(); my($target, $zeerex) = @_; - ### Should compile the ZeeRex record into something useful. + if (!defined $zeerex) { + $zeerex = _empty_zeerex_record($target); + } + + my $parser = new XML::LibXML(); return bless { target => $target, - zeerex => $zeerex, + parser => $parser, + zeerex => $parser->parse_string($zeerex)->documentElement(), }, $class; } +sub _empty_zeerex_record { + my($target) = @_; + + ### Doesn't recognise SRU/SRW URLs + my($host, $port, $db) = ZOOM::IRSpy::_parse_target_string($target); + + return <<__EOT__; + + + $host + $port + $db + + +__EOT__ +} + + +sub append_entry { + my $this = shift(); + my($xpath, $frag) = @_; + + #print STDERR "this=$this, xpath='$xpath', frag='$frag'\n"; + my $root = $this->{zeerex}; # XML::LibXML::Element ISA XML::LibXML::Node + my $xc = XML::LibXML::XPathContext->new($root); + $xc->registerNs(zeerex => "http://explain.z3950.org/dtd/2.0/"); + $xc->registerNs(irspy => "http://indexdata.com/irspy/1.0"); + + my @nodes = $xc->findnodes($xpath); + if (@nodes == 0) { + # Make the node that we're inserting into, if possible. A + # fully general version would work its way through each + # component of the XPath, but for now we just treat it as a + # single chunk to go inside the top-level node. + $this->_half_decent_appendWellBalancedChunk($root, + "<$xpath>"); + @nodes = $xc->findnodes($xpath); + die("still no matches for '$xpath' after creating: can't append") + if @nodes == 0; + } + + ZOOM::Log::log("irspy", + scalar(@nodes), " matches for '$xpath': using first") + if @nodes > 1; + + $this->_half_decent_appendWellBalancedChunk($nodes[0], $frag); +} + + +# *sigh* +# +# _Clearly_ the right way to append a well-balanced chunk of XML to +# a node's children is to call appendWellBalancedChunk() from the +# XML::LibXML::Element class. However, this fails in the common case +# where the ZeeRex record we're working with doesn't declare the +# "irspy" namespace that the inserted fragments use. +# +# To my utter astonishment it seems that XML::LibXML (as of version +# 1.58, 31st March 2004) doesn't provide ANY way to register a +# namespace for parsing, which makes the parse_balanced_chunk() +# function that appendWellBalancedChunk() uses effectively useless. +# It _is_ possible to use setNamespace() on a node, to register a new +# namespace mapping for that node -- but that only affects pre-parsed +# trees, and is no use for parsing. Hence the following pair of lines +# DOES NOT WORK: +# $node->setNamespace("http://indexdata.com/irspy/1.0", "irspy", 0); +# $node->appendWellBalancedChunk($frag); +# +# Instead I have to go the long way round, hence this method. I have +# two candidate re-implementations, of which the former is marginally +# less loathsome, but does require that the excess namespace +# declarations be factored out later -- as least, if you want neat +# output. +# +sub _half_decent_appendWellBalancedChunk { + my $this = shift(); + my($node, $frag) = @_; + + if (1) { + $frag =~ s,>, xmlns:irspy="http://indexdata.com/irspy/1.0">,; + $node->appendWellBalancedChunk($frag); + return; + } + + # Instead -- and to call this brain-damaged would be an insult + # to all those fine people out there with actual brain damage + # -- I have to "parse" the XML fragment myself and insert the + # resulting hand-build DOM tree. Someone shoot me now. + my($open, $content, $close) = $frag =~ /^<(.*?)>(.*)<\/(.*?)>$/; + die "can't 'parse' XML fragment '$frag'" + if !defined $open; + my($tag, $attrs) = $open =~ /(.*?)\s(.*)/; + $tag = $open if !defined $tag; + die "mismatched XML start/end <$open>...<$close>" + if $close ne $tag; + print STDERR "tag='$tag', attrs=[$attrs], content='$content'\n"; + die "## no code yet to make DOM node"; +} + + =head1 SEE ALSO -Net::Z3950::IRSpy +ZOOM::IRSpy =head1 AUTHOR