From cffc7e022fec2dc52a445299cccf029c2c9c413a Mon Sep 17 00:00:00 2001 From: Mike Taylor Date: Wed, 1 Nov 2006 11:46:10 +0000 Subject: [PATCH] Robust against complex text-nodes contains comments, PIs, etc. --- lib/ZOOM/IRSpy/Utils.pm | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/lib/ZOOM/IRSpy/Utils.pm b/lib/ZOOM/IRSpy/Utils.pm index ca111ce..e52747f 100644 --- a/lib/ZOOM/IRSpy/Utils.pm +++ b/lib/ZOOM/IRSpy/Utils.pm @@ -1,4 +1,4 @@ -# $Id: Utils.pm,v 1.4 2006-11-01 10:13:26 mike Exp $ +# $Id: Utils.pm,v 1.5 2006-11-01 11:46:10 mike Exp $ package ZOOM::IRSpy::Utils; @@ -71,15 +71,29 @@ sub modify_xml_document { print "Attr $key: '", $node->getValue(), "' -> '$value' ($xpath)
\n"; } } elsif ($node->isa("XML::LibXML::Element")) { - my $child = $node->firstChild(); - ### Next line fails if data contains a comment ... *sigh* - die "element child $child is not text" - if !ref $child || !$child->isa("XML::LibXML::Text"); - if ($value ne $child->getData()) { - $child->setData($value); - $nchanges++; - print "Elem $key: '", $child->getData(), "' -> '$value' ($xpath)
\n"; + # The contents could be any mixture of text and + # comments and maybe even other crud such as processing + # instructions. The simplest thing is just to throw it all + # away and start again, making a single Text node the + # canonical representation. But before we do that, + # we'll check whether the element is already + # canonical, to determine whether our change is a + # no-op. + my $old = "???"; + my @children = $node->childNodes(); + if (@children == 1) { + my $child = $node->firstChild(); + if (ref $child && ref $child eq "XML::LibXML::Text") { + $old = $child->getData(); + next if $value eq $old; + } } + + $node->removeChildNodes(); + my $child = new XML::LibXML::Text($value); + $node->appendChild($child); + $nchanges++; + print "Elem $key: '$old' -> '$value' ($xpath)
\n"; } else { warn "unexpected node type $node"; } -- 1.7.10.4