X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=util%2Fabs2dom;h=bfb77d9a4fe3df28f3b0722381417206fe614387;hp=3a3acba40efc1505b90c4c6573ea1eaa00c83db2;hb=33c15f71bd643f902e9ca75847c32942cd04920f;hpb=707cc9ebfa7f462ec58a31cfbad4c1a09fecfa00 diff --git a/util/abs2dom b/util/abs2dom index 3a3acba..bfb77d9 100755 --- a/util/abs2dom +++ b/util/abs2dom @@ -1,16 +1,46 @@ #!/usr/bin/perl -w -# $Id: abs2dom,v 1.2 2007-12-17 12:28:50 sondberg Exp $ # ---------------------------------------------------------------------------- # Generate a dom-filter indexing stylesheet based upon an .abs file +# Should be called either this way +# +# abs2dom something.abs > something.xsl +# +# or in a streaming way +# +# something | abs2dom > something.xsl +# +# The output xslt stylesheet generally needs a little bit of tweaking to be +# ready for indexing. In particular, watch out for the precedence rules of +# xslt templates which work differently from xelm declarations in an .abs file! +# +# Good luck! use strict; +my $marc_prefix = 'marc'; +my $supported_rules = { + + # Supported indexing types: + 'melm' => \&melm_handler, + 'xelm' => sub { return $_[1] }, + + # Declarations to ignore: + 'attset' => 0, + 'encoding' => 0, + 'esetname' => 0, + 'marc' => 0, + 'name' => 0, + 'xpath' => 0 + +}; + print < + xmlns:z="http://indexdata.com/zebra-2.0" + xmlns:$marc_prefix="http://www.loc.gov/MARC21/slim" + version="1.0"> ) { + my $handler = undef; + chomp; s/^\s+//; s/\s+$//; - next unless s/^xelm\s+//; + next unless length; + next if /^#/; + + my ($rule) = (/^(\S+)/); + + if ( defined $supported_rules->{$rule} ) { + $handler = $supported_rules->{$rule}; + + if ( $handler == 0 ) { + next; + } + } else { + print STDERR "$0: Unsupported indexing rule: '", $rule, "\n\n"; + next; + } + + s/^\Q$rule\E\s+//; + my ($index) = (/(\S+)$/); + s/\s+\Q$index\E$//; - my $xpath = $_; + + my $match = $_; + my $xpath = $handler->($rule, $match); my @indexes = split /,/, $index; + # To avoid screwing up the instruction... + $xpath =~ s/"/'/g; + print " \n"; print " \n"; print " \n"; @@ -43,5 +98,27 @@ while (<>) { print " \n\n"; } - print "\n"; + + +sub melm_handler { + my ($rule, $match) = @_; + my ($field, $subfield) = ($match =~ /([^\$]+)\$?(.*)/); + my $xpath = '/*/'; + + if ( $field =~ /^00/ ) { + $xpath .= $marc_prefix . ':controlfield[@tag=\'' . $field . '\']'; + } else { + $xpath .= $marc_prefix . ':datafield[@tag=\'' . $field . '\']/' . + $marc_prefix . ':subfield'; + + if ( $subfield ne '' ) { + $xpath .= '[@code=\'' . $subfield . '\']'; + } + } + + return $xpath; +} + + +