#!/usr/bin/perl -w # ---------------------------------------------------------------------------- # Generate a dom-filter indexing stylesheet based upon an .abs file # Should be called either this way # # idzebra-abs2dom something.abs > something.xsl # # or in a streaming way # # something | idzebra-abs2dom > something.xsl # # The output xslt stylesheet generally needs a little bit of tweaking to be # ready for indexing. In particular, watch out for the precedence rules of # xslt templates which work differently from xelm declarations in an .abs file! # # Good luck! use strict; my $marc_prefix = 'marc'; my $supported_rules = { # Supported indexing types: 'melm' => \&melm_handler, 'xelm' => sub { return $_[1] }, # Declarations to ignore: 'attset' => 0, 'encoding' => 0, 'esetname' => 0, 'marc' => 0, 'name' => 0, 'xpath' => 0 }; print < END_OF_XSLT while (<>) { my $handler = undef; chomp; s/^\s+//; s/\s+$//; next unless length; next if /^#/; my ($rule) = (/^(\S+)/); if ( defined $supported_rules->{$rule} ) { $handler = $supported_rules->{$rule}; if ( $handler == 0 ) { next; } } else { print STDERR "$0: Unsupported indexing rule: '", $rule, "\n\n"; next; } s/^\Q$rule\E\s+//; my ($index) = (/(\S+)$/); s/\s+\Q$index\E$//; my $match = $_; my $xpath = $handler->($rule, $match); my @indexes = split /,/, $index; # To avoid screwing up the instruction... $xpath =~ s/"/'/g; print " \n"; print " \n"; print " \n"; print " \n"; print " \n\n"; } print "\n"; sub melm_handler { my ($rule, $match) = @_; my ($field, $subfield) = ($match =~ /([^\$]+)\$?(.*)/); my $xpath = '/*/'; if ( $field =~ /^00/ ) { $xpath .= $marc_prefix . ':controlfield[@tag=\'' . $field . '\']'; } else { $xpath .= $marc_prefix . ':datafield[@tag=\'' . $field . '\']/' . $marc_prefix . ':subfield'; if ( $subfield ne '' ) { $xpath .= '[@code=\'' . $subfield . '\']'; } } return $xpath; }