X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=util%2Fabs2dom;h=bfb77d9a4fe3df28f3b0722381417206fe614387;hp=fb20cdcf474318b7f6ca4141b5fd8b5d66159428;hb=33c15f71bd643f902e9ca75847c32942cd04920f;hpb=daca89243af359a79f23b69690d2b39aa6db9b67 diff --git a/util/abs2dom b/util/abs2dom index fb20cdc..bfb77d9 100755 --- a/util/abs2dom +++ b/util/abs2dom @@ -1,31 +1,124 @@ #!/usr/bin/perl -w -# $Id: abs2dom,v 1.1 2007-12-17 11:48:14 sondberg Exp $ # ---------------------------------------------------------------------------- # Generate a dom-filter indexing stylesheet based upon an .abs file +# Should be called either this way +# +# abs2dom something.abs > something.xsl +# +# or in a streaming way +# +# something | abs2dom > something.xsl +# +# The output xslt stylesheet generally needs a little bit of tweaking to be +# ready for indexing. In particular, watch out for the precedence rules of +# xslt templates which work differently from xelm declarations in an .abs file! +# +# Good luck! use strict; -my $xslt_header = < +my $marc_prefix = 'marc'; +my $supported_rules = { + + # Supported indexing types: + 'melm' => \&melm_handler, + 'xelm' => sub { return $_[1] }, + + # Declarations to ignore: + 'attset' => 0, + 'encoding' => 0, + 'esetname' => 0, + 'marc' => 0, + 'name' => 0, + 'xpath' => 0 + +}; + +print < + + + + + + + + END_OF_XSLT while (<>) { + my $handler = undef; + chomp; s/^\s+//; s/\s+$//; - next unless s/^xelm\s+//; - my ($indexes) = (/(\S+)$/); - s/\s+\Q$indexes\E$//; - my $xpath = $_; + next unless length; + next if /^#/; + + my ($rule) = (/^(\S+)/); + + if ( defined $supported_rules->{$rule} ) { + $handler = $supported_rules->{$rule}; + + if ( $handler == 0 ) { + next; + } + } else { + print STDERR "$0: Unsupported indexing rule: '", $rule, "\n\n"; + next; + } + + s/^\Q$rule\E\s+//; + + my ($index) = (/(\S+)$/); + + s/\s+\Q$index\E$//; - print "XPATH='$xpath', INDEX='$indexes'\n"; + my $match = $_; + my $xpath = $handler->($rule, $match); + my @indexes = split /,/, $index; + + # To avoid screwing up the instruction... + $xpath =~ s/"/'/g; + + print " \n"; + print " \n"; + print " \n"; + print " \n"; + print " \n\n"; +} + +print "\n"; + + +sub melm_handler { + my ($rule, $match) = @_; + my ($field, $subfield) = ($match =~ /([^\$]+)\$?(.*)/); + my $xpath = '/*/'; + + if ( $field =~ /^00/ ) { + $xpath .= $marc_prefix . ':controlfield[@tag=\'' . $field . '\']'; + } else { + $xpath .= $marc_prefix . ':datafield[@tag=\'' . $field . '\']/' . + $marc_prefix . ':subfield'; + + if ( $subfield ne '' ) { + $xpath .= '[@code=\'' . $subfield . '\']'; + } + } + + return $xpath; } + + +