2 # ============================================================================
3 # Zebra perl API header
4 # =============================================================================
7 # ============================================================================
8 package IDZebra::Filter;
11 use IDZebra::Logger qw(:flags :calls);
17 IDZebra::init(); # ??? Do we need that at all (this is jus nmem init...)
21 # -----------------------------------------------------------------------------
23 # -----------------------------------------------------------------------------
25 my ($proto,$context) = @_;
26 my $class = ref($proto) || $proto;
28 $self->{context} = $context;
29 bless ($self, $class);
33 # -----------------------------------------------------------------------------
35 # -----------------------------------------------------------------------------
40 # print STDERR "LEAK",Devel::Leak::CheckSV($self->{dl}),"\n";
43 # print STDERR "LEAK",Devel::Leak::NoteSV($self->{dl}),"\n";
45 # This is ugly... could be passed as parameters... but didn't work.
47 my $dh = IDZebra::grs_perl_get_dh($self->{context});
48 my $mem = IDZebra::grs_perl_get_mem($self->{context});
49 my $d1 = IDZebra::Data1->get($dh,$mem);
53 eval {$rootnode = $self->process($d1)};
55 logf(LOG_WARN,"Error processing perl filter:%s\n",$@);
58 $rootnode = $self->process($d1);
60 IDZebra::grs_perl_set_res($self->{context},$rootnode);
65 my ($self, $buff) = @_;
66 $self->{_buff} = $buff;
69 # -----------------------------------------------------------------------------
70 # API Template - These methods should be overriden by the implementing class.
71 # -----------------------------------------------------------------------------
73 # This one is called once, when the module is loaded. Not in
74 # object context yet!!!
79 # Just going to return a root node.
80 return ($d1->mk_root('empty'));
83 # -----------------------------------------------------------------------------
85 # -----------------------------------------------------------------------------
87 my ($proto, $file, %args) = @_;
89 # print "Proto:$proto\n";
91 my $class = ref($proto) || $proto;
93 bless ($self, $class);
96 open ($th, $file) || croak ("Cannot open $file");
100 my $m = IDZebra::nmem_create();
101 my $d1=IDZebra::Data1->new($m,$IDZebra::DATA1_FLAG_XML);
102 if ($args{tabPath}) { $d1->tabpath($args{tabPath}); }
103 if ($args{tabRoot}) { $d1->tabroot($args{tabRoot}); }
105 my $rootnode = $self->process($d1);
106 $d1->pr_tree($rootnode);
107 $d1->free_tree($rootnode);
111 $self->{testh} = undef;
115 # -----------------------------------------------------------------------------
117 # -----------------------------------------------------------------------------
119 my ($self, $buff, $len, $offset) = @_;
121 if ($self->{testh}) {
122 return (read($self->{testh},$_[1],$len,$offset));
124 my $r = IDZebra::grs_perl_readf($self->{context},$len);
126 $buff = $self->{_buff};
127 $self->{_buff} = undef;
134 my ($self, $buffsize) = @_;
139 if ($self->{testh}) {
140 $r = read($self->{testh}, $self->{_buff}, $buffsize);
142 $r = IDZebra::grs_perl_readf($self->{context},$buffsize);
145 $res .= $self->{_buff};
146 $self->{_buff} = undef;
154 my ($self, $offset) = @_;
155 if ($self->{testh}) {
156 # I'm not sure if offset is absolute or relative here...
157 return (seek ($self->{testh}, $offset, $0));
159 return (IDZebra::grs_perl_seekf($self->{context},$offset)) ;
165 if ($self->{testh}) {
168 return (IDZebra::grs_perl_seekf($self->{context}));
173 my ($self, $offset) = @_;
174 if ($self->{testh}) {
177 IDZebra::grs_perl_endf($self->{context},$offset);
185 IDZebra::Filter - A superclass of perl filters for Zebra
192 our @ISA=qw(IDZebra::Filter);
202 my $rootnode=$d1->mk_root('meta');
209 When Zebra is trying to index/present a record, needs to extract information from it's source. For some types of input, "hardcoded" procedures are defined, but you have the opportunity to write your own filter code in Tcl or in perl.
211 The perl implementation is nothing but a package, deployed in some available location for Zebra (in I<profilePath>, or in PERL_INCLUDE (@INC)). This package is interpreted once needed, a filter object is created, armored with knowledge enough, to read data from the source, and to generate data1 structures as result. For each individual source "files" the process method is called.
213 This class is supposed to be inherited in all perl filter classes, as it is providing a way of communication between the filter code and the indexing/retrieval process.
215 =head1 IMPLEMENTING FILTERS IN PERL
217 All you have to do is to create a perl package, using and inheriting this one (IDZebra::Filter), and implement the "process" method. The parameter of this call is an IDZebra::Data1 object, representing a data1 handle. You can use this, to reach the configuration information and to build your data structure. What you have to return is a data1 root node. To create it:
219 my $rootnode=$d1->mk_root('meta');
221 where 'meta' is the abstract syntax identifier (in this case Zebra will try to locate meta.abs, and apply it). Then just continue to build the structure. See i<IDZebra::Data1> for details.
223 In order to get the input stream, you can use "virtual" file operators (as the source is not necessairly a file):
225 =item readf($buf,$len,$offset)
227 Going to read $len bytes of data from offset $offset into $buff
229 =item readall($bufflen)
231 Read the entire stream, by reading $bufflen bytes at once
239 Tells the current offset (?)
245 Optionally, you can implement an init call for your class. This call is not going to be called in object, but in class context. Stupid, eh?
247 =head1 TEST YOUR PERL FILTER
249 You can check the functionality of your filter code, by writing a small test program like
253 $res =pod->test($ARGV[0],
254 (tabPath=>'.:../../tab:../../../yaz/tab'));
256 This will try to apply the filter on the file provided as argument, and display the generated data1 structure. However, there are some differences, when running a filter in test mode:
257 - The include path is not applied from tabPath
258 - the tellf, and endf functions are not implemented (just ignored)
260 =head1 CONFIGURE ZEBRA TO USE A PERL FILTER
262 This is quite simple. Read the Zebra manual, and follow the instructions to create your zebra.cfg. For your I<recordType> choose 'grs.perl.<YourFilterClass>'.
263 Copy your filter module (YourFilterClass.pm) to a directory listed in I<profilePath>. i<profilePath> is added to @INC, when interpreting your package: so if you need to load modules from different locations than the default perl include path, just add these directories.
271 Peter Popovics, pop@technomat.hu
275 IDZebra, IDZebra::Data1, Zebra documentation