X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=util%2Fidzebra-abs2dom;fp=util%2Fidzebra-abs2dom;h=464ca98fa3775046dc1ce9f7cdfd74965b9a875c;hp=0000000000000000000000000000000000000000;hb=df71705159b0acf7d390a28bbdd0e48d1361d15a;hpb=33c15f71bd643f902e9ca75847c32942cd04920f diff --git a/util/idzebra-abs2dom b/util/idzebra-abs2dom new file mode 100755 index 0000000..464ca98 --- /dev/null +++ b/util/idzebra-abs2dom @@ -0,0 +1,124 @@ +#!/usr/bin/perl -w + +# ---------------------------------------------------------------------------- +# Generate a dom-filter indexing stylesheet based upon an .abs file +# Should be called either this way +# +# idzebra-abs2dom something.abs > something.xsl +# +# or in a streaming way +# +# something | idzebra-abs2dom > something.xsl +# +# The output xslt stylesheet generally needs a little bit of tweaking to be +# ready for indexing. In particular, watch out for the precedence rules of +# xslt templates which work differently from xelm declarations in an .abs file! +# +# Good luck! + +use strict; + +my $marc_prefix = 'marc'; +my $supported_rules = { + + # Supported indexing types: + 'melm' => \&melm_handler, + 'xelm' => sub { return $_[1] }, + + # Declarations to ignore: + 'attset' => 0, + 'encoding' => 0, + 'esetname' => 0, + 'marc' => 0, + 'name' => 0, + 'xpath' => 0 + +}; + +print < + + + + + + + + + + +END_OF_XSLT + + +while (<>) { + my $handler = undef; + + chomp; + s/^\s+//; + s/\s+$//; + next unless length; + next if /^#/; + + my ($rule) = (/^(\S+)/); + + if ( defined $supported_rules->{$rule} ) { + $handler = $supported_rules->{$rule}; + + if ( $handler == 0 ) { + next; + } + } else { + print STDERR "$0: Unsupported indexing rule: '", $rule, "\n\n"; + next; + } + + s/^\Q$rule\E\s+//; + + my ($index) = (/(\S+)$/); + + s/\s+\Q$index\E$//; + + my $match = $_; + my $xpath = $handler->($rule, $match); + my @indexes = split /,/, $index; + + # To avoid screwing up the instruction... + $xpath =~ s/"/'/g; + + print " \n"; + print " \n"; + print " \n"; + print " \n"; + print " \n\n"; +} + +print "\n"; + + +sub melm_handler { + my ($rule, $match) = @_; + my ($field, $subfield) = ($match =~ /([^\$]+)\$?(.*)/); + my $xpath = '/*/'; + + if ( $field =~ /^00/ ) { + $xpath .= $marc_prefix . ':controlfield[@tag=\'' . $field . '\']'; + } else { + $xpath .= $marc_prefix . ':datafield[@tag=\'' . $field . '\']/' . + $marc_prefix . ':subfield'; + + if ( $subfield ne '' ) { + $xpath .= '[@code=\'' . $subfield . '\']'; + } + } + + return $xpath; +} + + +