2 # ============================================================================
3 # Zebra perl API header
4 # =============================================================================
7 # ============================================================================
8 package IDZebra::Filter;
11 use IDZebra::Logger qw(:flags :calls);
12 use Symbol qw(gensym);
18 IDZebra::init(); # ??? Do we need that at all (this is jus nmem init...)
22 # -----------------------------------------------------------------------------
24 # -----------------------------------------------------------------------------
26 my ($proto,$context) = @_;
27 my $class = ref($proto) || $proto;
29 $self->{context} = $context;
30 bless ($self, $class);
34 # -----------------------------------------------------------------------------
36 # -----------------------------------------------------------------------------
41 # print STDERR "LEAK",Devel::Leak::CheckSV($self->{dl}),"\n";
44 # print STDERR "LEAK",Devel::Leak::NoteSV($self->{dl}),"\n";
46 # This is ugly... could be passed as parameters... but didn't work.
48 my $dh = IDZebra::grs_perl_get_dh($self->{context});
49 my $mem = IDZebra::grs_perl_get_mem($self->{context});
50 my $d1 = IDZebra::Data1->get($dh,$mem);
54 eval {$rootnode = $self->process($d1)};
56 logf(LOG_WARN,"Error processing perl filter:%s\n",$@);
59 $rootnode = $self->process($d1);
61 IDZebra::grs_perl_set_res($self->{context},$rootnode);
66 my ($self, $buff) = @_;
67 $self->{_buff} = $buff;
70 # -----------------------------------------------------------------------------
71 # API Template - These methods should be overriden by the implementing class.
72 # -----------------------------------------------------------------------------
74 # This one is called once, when the module is loaded. Not in
75 # object context yet!!!
80 # Just going to return a root node.
81 return ($d1->mk_root('empty'));
84 # -----------------------------------------------------------------------------
86 # -----------------------------------------------------------------------------
88 my ($proto, $file, %args) = @_;
90 my $class = ref($proto) || $proto;
92 bless ($self, $class);
95 open ($th, $file) || croak ("Cannot open $file");
99 my $m = IDZebra::nmem_create();
100 my $d1=IDZebra::Data1->new($m,$IDZebra::DATA1_FLAG_XML);
101 if ($args{tabPath}) { $d1->tabpath($args{tabPath}); }
102 if ($args{tabRoot}) { $d1->tabroot($args{tabRoot}); }
104 my $rootnode = $self->process($d1);
105 $d1->pr_tree($rootnode);
106 $d1->free_tree($rootnode);
110 $self->{testh} = undef;
114 # -----------------------------------------------------------------------------
116 # -----------------------------------------------------------------------------
118 my ($self, $buff, $len, $offset) = @_;
120 if ($self->{testh}) {
121 return (read($self->{testh},$_[1],$len,$offset));
123 my $r = IDZebra::grs_perl_readf($self->{context},$len);
125 $buff = $self->{_buff};
126 $self->{_buff} = undef;
135 my $r = IDZebra::grs_perl_readline($self->{context});
137 my $buff = $self->{_buff};
138 $self->{_buff} = undef;
146 return(IDZebra::grs_perl_getc($self->{context}));
152 tie (*$fh,'IDZebra::FilterFile', $self);
157 my ($self, $buffsize) = @_;
162 if ($self->{testh}) {
163 $r = read($self->{testh}, $self->{_buff}, $buffsize);
165 $r = IDZebra::grs_perl_readf($self->{context},$buffsize);
168 $res .= $self->{_buff};
169 $self->{_buff} = undef;
177 my ($self, $offset) = @_;
178 if ($self->{testh}) {
179 # I'm not sure if offset is absolute or relative here...
180 return (seek ($self->{testh}, $offset, $0));
182 return (IDZebra::grs_perl_seekf($self->{context},$offset)) ;
188 if ($self->{testh}) {
191 return (IDZebra::grs_perl_seekf($self->{context}));
196 my ($self, $offset) = @_;
197 if ($self->{testh}) {
200 IDZebra::grs_perl_endf($self->{context},$offset);
203 # ----------------------------------------------------------------------------
204 # The 'virtual' filehandle for zebra extract calls
205 # ----------------------------------------------------------------------------
206 package IDZebra::FilterFile;
209 our @ISA = qw(Tie::Handle);
214 bless ($self, $class);
215 $self->{filter} = shift;
221 return ($self->{filter}->readf(@_));
226 return ($self->{filter}->readline());
231 return ($self->{filter}->getc());
235 croak ("EOF not implemented");
239 croak ("TELL not implemented");
243 croak ("SEEK not implemented");
255 IDZebra::Filter - A superclass of perl filters for Zebra
262 our @ISA=qw(IDZebra::Filter);
272 my $rootnode=$d1->mk_root('meta');
279 When Zebra is trying to index/present a record, needs to extract information from it's source. For some types of input, "hardcoded" procedures are defined, but you have the opportunity to write your own filter code in Tcl or in perl.
281 The perl implementation is nothing but a package, deployed in some available location for Zebra (in I<profilePath>, or in PERL_INCLUDE (@INC)). This package is interpreted once needed, a filter object is created, armored with knowledge enough, to read data from the source, and to generate data1 structures as result. For each individual source "files" the process method is called.
283 This class is supposed to be inherited in all perl filter classes, as it is providing a way of communication between the filter code and the indexing/retrieval process.
285 =head1 IMPLEMENTING FILTERS IN PERL
287 All you have to do is to create a perl package, using and inheriting this one (IDZebra::Filter), and implement the "process" method. The parameter of this call is an IDZebra::Data1 object, representing a data1 handle. You can use this, to reach the configuration information and to build your data structure. What you have to return is a data1 root node. To create it:
289 my $rootnode=$d1->mk_root('meta');
291 where 'meta' is the abstract syntax identifier (in this case Zebra will try to locate meta.abs, and apply it). Then just continue to build the structure. See i<IDZebra::Data1> for details.
293 In order to get the input stream, you can use "virtual" file operators (as the source is not necessairly a file):
295 =item readf($buf,$len,$offset)
297 Going to read $len bytes of data from offset $offset into $buff
299 =item readall($bufflen)
301 Read the entire stream, by reading $bufflen bytes at once
309 Tells the current offset (?)
315 Optionally, you can implement an init call for your class. This call is not going to be called in object, but in class context. Stupid, eh?
317 =head1 TEST YOUR PERL FILTER
319 You can check the functionality of your filter code, by writing a small test program like
323 $res =pod->test($ARGV[0],
324 (tabPath=>'.:../../tab:../../../yaz/tab'));
326 This will try to apply the filter on the file provided as argument, and display the generated data1 structure. However, there are some differences, when running a filter in test mode:
327 - The include path is not applied from tabPath
328 - the tellf, and endf functions are not implemented (just ignored)
330 =head1 CONFIGURE ZEBRA TO USE A PERL FILTER
332 This is quite simple. Read the Zebra manual, and follow the instructions to create your zebra.cfg. For your I<recordType> choose 'grs.perl.<YourFilterClass>'.
333 Copy your filter module (YourFilterClass.pm) to a directory listed in I<profilePath>. i<profilePath> is added to @INC, when interpreting your package: so if you need to load modules from different locations than the default perl include path, just add these directories.
341 Peter Popovics, pop@technomat.hu
345 IDZebra, IDZebra::Data1, Zebra documentation