From: pop Date: Fri, 28 Feb 2003 16:17:42 +0000 (+0000) Subject: Perl filters now can optionally reach data via a virtual filehandle. X-Git-Tag: ZEBRA.1.3.8~62 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=0a34a94a93dc7d594d0f6a7f359a63dbc1297775 Perl filters now can optionally reach data via a virtual filehandle. Enhanced features on the api (like searching :)) Bug fixes --- diff --git a/perl/IDZebra.i b/perl/IDZebra.i index e289a54..bbbdcff 100644 --- a/perl/IDZebra.i +++ b/perl/IDZebra.i @@ -569,6 +569,8 @@ const char *data1_get_tabroot(data1_handle dh); * ========================================================================= */ int grs_perl_readf(struct perl_context *context, size_t len); +int grs_perl_readline(struct perl_context *context); +char grs_perl_getc(struct perl_context *context); off_t grs_perl_seekf(struct perl_context *context, off_t offset); off_t grs_perl_tellf(struct perl_context *context); void grs_perl_endf(struct perl_context *context, off_t offset); diff --git a/perl/IDZebra_wrap.c b/perl/IDZebra_wrap.c index fc20149..342b0a1 100644 --- a/perl/IDZebra_wrap.c +++ b/perl/IDZebra_wrap.c @@ -212,7 +212,7 @@ SWIG_TypeClientData(swig_type_info *ti, void *clientdata) { * perl5.swg * * Perl5 runtime library - * $Header: /home/cvsroot/idis/perl/Attic/IDZebra_wrap.c,v 1.5 2003-02-27 23:12:18 pop Exp $ + * $Header: /home/cvsroot/idis/perl/Attic/IDZebra_wrap.c,v 1.6 2003-02-28 16:17:42 pop Exp $ * ----------------------------------------------------------------------------- */ #define SWIGPERL @@ -7831,6 +7831,64 @@ XS(_wrap_grs_perl_readf) { } +XS(_wrap_grs_perl_readline) { + char _swigmsg[SWIG_MAX_ERRMSG] = ""; + const char *_swigerr = _swigmsg; + { + struct perl_context *arg1 ; + int result; + int argvi = 0; + dXSARGS; + + if ((items < 1) || (items > 1)) { + SWIG_croak("Usage: grs_perl_readline(context);"); + } + { + if (SWIG_ConvertPtr(ST(0), (void **) &arg1, SWIGTYPE_p_perl_context,0) < 0) { + SWIG_croak("Type error in argument 1 of grs_perl_readline. Expected _p_perl_context"); + } + } + result = (int)grs_perl_readline(arg1); + + ST(argvi) = sv_newmortal(); + sv_setiv(ST(argvi++), (IV) result); + XSRETURN(argvi); + fail: + (void) _swigerr; + } + croak(_swigerr); +} + + +XS(_wrap_grs_perl_getc) { + char _swigmsg[SWIG_MAX_ERRMSG] = ""; + const char *_swigerr = _swigmsg; + { + struct perl_context *arg1 ; + char result; + int argvi = 0; + dXSARGS; + + if ((items < 1) || (items > 1)) { + SWIG_croak("Usage: grs_perl_getc(context);"); + } + { + if (SWIG_ConvertPtr(ST(0), (void **) &arg1, SWIGTYPE_p_perl_context,0) < 0) { + SWIG_croak("Type error in argument 1 of grs_perl_getc. Expected _p_perl_context"); + } + } + result = (char)grs_perl_getc(arg1); + + ST(argvi) = sv_newmortal(); + sv_setpvn((SV*)ST(argvi++), &result, 1); + XSRETURN(argvi); + fail: + (void) _swigerr; + } + croak(_swigerr); +} + + XS(_wrap_grs_perl_seekf) { char _swigmsg[SWIG_MAX_ERRMSG] = ""; const char *_swigerr = _swigmsg; @@ -8380,6 +8438,8 @@ static swig_command_info swig_commands[] = { {"IDZebrac::data1_get_tabpath", _wrap_data1_get_tabpath}, {"IDZebrac::data1_get_tabroot", _wrap_data1_get_tabroot}, {"IDZebrac::grs_perl_readf", _wrap_grs_perl_readf}, +{"IDZebrac::grs_perl_readline", _wrap_grs_perl_readline}, +{"IDZebrac::grs_perl_getc", _wrap_grs_perl_getc}, {"IDZebrac::grs_perl_seekf", _wrap_grs_perl_seekf}, {"IDZebrac::grs_perl_tellf", _wrap_grs_perl_tellf}, {"IDZebrac::grs_perl_endf", _wrap_grs_perl_endf}, diff --git a/perl/demo/pod.pm b/perl/demo/pod.pm index e20e78d..e4ad0a1 100644 --- a/perl/demo/pod.pm +++ b/perl/demo/pod.pm @@ -9,6 +9,7 @@ package pod; use IDZebra::Filter; use IDZebra::Data1; use Pod::Text; +use Symbol qw(gensym); our @ISA=qw(IDZebra::Filter); 1; @@ -27,19 +28,18 @@ sub process { my $r1=$d1->mk_root('pod'); my $root=$d1->mk_tag($r1,'pod'); - # This is dirty... Pod::Parser doesn't seems to support - # parsing a string, so we have to write the whole thing out into a - # temporary file - open (TMP, ">$tempfile_in"); - print TMP $self->readall(10240); - close (TMP); + # Get the input "file handle" + my $inf = $self->get_fh; - $parser->parse_from_file ($tempfile_in, $tempfile_out); + # Create a funny output "file handle" + my $outf = gensym; + tie (*$outf,'MemFile'); + + $parser->parse_from_filehandle ($inf, $outf); my $section; my $data; - open (TMP, "$tempfile_out"); - while() { + while(<$outf>) { chomp; if (/^([A-Z]+)\s*$/) { my $ss = $1; @@ -59,7 +59,34 @@ sub process { my $tag = $d1->mk_tag($root,$section); $d1->mk_text($tag,$data) if ($data); } - close (TMP); - return ($r1); } + +# ---------------------------------------------------------------------------- +# Package to collect data as an output file from stupid modules, who can only +# write to files... +# ---------------------------------------------------------------------------- +package MemFile; + +sub TIEHANDLE { + my $class = shift; + my $self = {}; + bless ($self,$class); + $self->{buff} = ""; + return ($self); +} + +sub PRINT { + my $self = shift; + for (@_) { + $self->{buff} .= $_; + } +} + +sub READLINE { + my $self = shift; + my $res; + return (undef) unless ($self->{buff}); + ($res,$self->{buff}) = split (/\n/,$self->{buff},2); + return ($res."\n"); +} diff --git a/perl/demo/zebra.cfg b/perl/demo/zebra.cfg index 92d856f..53e3d08 100644 --- a/perl/demo/zebra.cfg +++ b/perl/demo/zebra.cfg @@ -14,7 +14,7 @@ memMax: 1 demo1.recordType.pm: grs.perl.pod -demo1.recordId: (bib1,Title) +demo1.recordId: $database (bib1,Title) demo1.database: demo1 demo2.recordType.pm: grs.perl.pod diff --git a/perl/lib/IDZebra.pm b/perl/lib/IDZebra.pm index 1a5a363..3cd2513 100644 --- a/perl/lib/IDZebra.pm +++ b/perl/lib/IDZebra.pm @@ -163,6 +163,8 @@ sub getScanEntry { *data1_get_tabpath = *IDZebrac::data1_get_tabpath; *data1_get_tabroot = *IDZebrac::data1_get_tabroot; *grs_perl_readf = *IDZebrac::grs_perl_readf; +*grs_perl_readline = *IDZebrac::grs_perl_readline; +*grs_perl_getc = *IDZebrac::grs_perl_getc; *grs_perl_seekf = *IDZebrac::grs_perl_seekf; *grs_perl_tellf = *IDZebrac::grs_perl_tellf; *grs_perl_endf = *IDZebrac::grs_perl_endf; diff --git a/perl/lib/IDZebra/Filter.pm b/perl/lib/IDZebra/Filter.pm index 29e7635..43d4cc8 100644 --- a/perl/lib/IDZebra/Filter.pm +++ b/perl/lib/IDZebra/Filter.pm @@ -9,7 +9,8 @@ package IDZebra::Filter; use IDZebra; use IDZebra::Data1; use IDZebra::Logger qw(:flags :calls); -use Devel::Leak; +use Symbol qw(gensym); +#use Devel::Leak; our $SAFE_MODE = 1; @@ -86,8 +87,6 @@ sub process { sub test { my ($proto, $file, %args) = @_; -# print "Proto:$proto\n"; - my $class = ref($proto) || $proto; my $self = {}; bless ($self, $class); @@ -130,6 +129,30 @@ sub readf { } } +sub readline { + my ($self) = @_; + + my $r = IDZebra::grs_perl_readline($self->{context}); + if ($r > 0) { + my $buff = $self->{_buff}; + $self->{_buff} = undef; + return ($buff); + } + return (undef); +} + +sub getc { + my ($self) = @_; + return(IDZebra::grs_perl_getc($self->{context})); +} + +sub get_fh { + my ($self) = @_; + my $fh = gensym; + tie (*$fh,'IDZebra::FilterFile', $self); + return ($fh); +} + sub readall { my ($self, $buffsize) = @_; my $r; @@ -177,6 +200,53 @@ sub endf { IDZebra::grs_perl_endf($self->{context},$offset); } } +# ---------------------------------------------------------------------------- +# The 'virtual' filehandle for zebra extract calls +# ---------------------------------------------------------------------------- +package IDZebra::FilterFile; +require Tie::Handle; + +our @ISA = qw(Tie::Handle); + +sub TIEHANDLE { + my $class = shift; + my $self = {}; + bless ($self, $class); + $self->{filter} = shift; + return ($self); +} + +sub READ { + my $self = shift; + return ($self->{filter}->readf(@_)); +} + +sub READLINE { + my $self = shift; + return ($self->{filter}->readline()); +} + +sub GETC { + my $self = shift; + return ($self->{filter}->getc()); +} + +sub EOF { + croak ("EOF not implemented"); +} + +sub TELL { + croak ("TELL not implemented"); +} + +sub SEEK { + croak ("SEEK not implemented"); +} + +sub CLOSE { + my $self = shift; +} + __END__ diff --git a/perl/lib/IDZebra/Resultset.pm b/perl/lib/IDZebra/Resultset.pm index 38ace67..dd40ad1 100644 --- a/perl/lib/IDZebra/Resultset.pm +++ b/perl/lib/IDZebra/Resultset.pm @@ -42,6 +42,8 @@ sub DESTROY { # print STDERR "Destroy RS\n"; + # Deleteresultset? + if ($self->{odr_stream}) { IDZebra::odr_reset($self->{odr_stream}); IDZebra::odr_destroy($self->{odr_stream}); diff --git a/perl/lib/IDZebra/Session.pm b/perl/lib/IDZebra/Session.pm index c3bf82e..1a395f0 100644 --- a/perl/lib/IDZebra/Session.pm +++ b/perl/lib/IDZebra/Session.pm @@ -27,6 +27,8 @@ sub new { bless ($self, $class); $self->{cql_ct} = undef; return ($self); + + $self->{databases} = {}; } sub start_service { @@ -70,9 +72,11 @@ sub open { } unless (defined($self->{zh})) { - $self->{zh}=IDZebra::open($self->{zs}) #if ($self->{zs}); + $self->{zh}=IDZebra::open($self->{zs}); } - + + # Reset result set counter + $self->{rscount} = 0; # This is needed in order to somehow initialize the service $self->select_databases("Default"); @@ -124,16 +128,14 @@ sub DESTROY { } } # ----------------------------------------------------------------------------- -# Record group selection +# Record group selection This is a bit nasty... but used at many places # ----------------------------------------------------------------------------- sub group { my ($self,%args) = @_; -# print STDERR "A\n"; if ($#_ > 0) { $self->{rg} = $self->_makeRecordGroup(%args); $self->_selectRecordGroup($self->{rg}); } -# print STDERR "B\n"; return($self->{rg}); } @@ -236,15 +238,42 @@ sub _selectRecordGroup { unless ($dbName = $rg->{databaseName}) { $dbName = 'Default'; } - if (IDZebra::select_database($self->{zh}, $dbName)) { - logf(LOG_FATAL, - "Could not select database %s errCode=%d", - $dbName, - $self->errCode()); - croak("Fatal error selecting record group"); - } else { - logf(LOG_LOG,"Database %s selected",$dbName); + if ($self->select_databases($dbName)) { + croak("Fatal error selecting database $dbName"); + } +} +# ----------------------------------------------------------------------------- +# Selecting databases for search (and also for updating - internally) +# ----------------------------------------------------------------------------- +sub select_databases { + my ($self, @databases) = @_; + + my $changed = 0; + foreach my $db (@databases) { + next if ($self->{databases}{$db}); + $changed++; } + + if ($changed) { + + delete ($self->{databases}); + foreach my $db (@databases) { + $self->{databases}{$db}++; + } + + if (my $res = IDZebra::select_databases($self->{zh}, + ($#databases + 1), + \@databases)) { + logf(LOG_FATAL, + "Could not select database(s) %s errCode=%d", + join(",",@databases), + $self->errCode()); + return ($res); + } else { + logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases)); + } + } + return (0); } # ----------------------------------------------------------------------------- @@ -273,9 +302,6 @@ sub begin_trans { IDZebra::begin_trans($self->{zh}); } - - - sub end_trans { my ($self) = @_; my $stat = IDZebra::ZebraTransactionStatus->new(); @@ -332,7 +358,8 @@ sub compact { sub update { my ($self, %args) = @_; - my $rg = $self->update_args(%args); + my $rg = $self->_update_args(%args); + $self->_selectRecordGroup($rg); $self->begin_trans; IDZebra::repository_update($self->{zh}); $self->_selectRecordGroup($self->{rg}); @@ -341,7 +368,8 @@ sub update { sub delete { my ($self, %args) = @_; - my $rg = $self->update_args(%args); + my $rg = $self->_update_args(%args); + $self->_selectRecordGroup($rg); $self->begin_trans; IDZebra::repository_delete($self->{zh}); $self->_selectRecordGroup($self->{rg}); @@ -350,14 +378,15 @@ sub delete { sub show { my ($self, %args) = @_; - my $rg = $self->update_args(%args); + my $rg = $self->_update_args(%args); + $self->_selectRecordGroup($rg); $self->begin_trans; IDZebra::repository_show($self->{zh}); $self->_selectRecordGroup($self->{rg}); $self->end_trans; } -sub update_args { +sub _update_args { my ($self, %args) = @_; my $rg = $self->_makeRecordGroup(%args); $self->_selectRecordGroup($rg); @@ -371,15 +400,15 @@ sub update_args { sub update_record { my ($self, %args) = @_; return(IDZebra::update_record($self->{zh}, - $self->record_update_args(%args))); + $self->_record_update_args(%args))); } sub delete_record { my ($self, %args) = @_; return(IDZebra::delete_record($self->{zh}, - $self->record_update_args(%args))); + $self->_record_update_args(%args))); } -sub record_update_args { +sub _record_update_args { my ($self, %args) = @_; my $sysno = $args{sysno} ? $args{sysno} : 0; @@ -426,42 +455,20 @@ sub record_update_args { } # ----------------------------------------------------------------------------- -# Search -# ----------------------------------------------------------------------------- -sub select_databases { - my ($self, @databases) = @_; - return (IDZebra::select_databases($self->{zh}, - ($#databases + 1), - \@databases)); -} - -sub search_pqf { - my ($self, $query, $setname) = @_; - my $hits = IDZebra::search_PQF($self->{zh}, - $self->{odr_input}, - $self->{odr_output}, - $query, - $setname); - - my $rs = IDZebra::Resultset->new($self, - name => $setname, - recordCount => $hits, - errCode => $self->errCode, - errString => $self->errString); - return($rs); -} - +# CQL stuff sub cqlmap { my ($self,$mapfile) = @_; if ($#_ > 0) { - unless (-f $mapfile) { - croak("Cannot find $mapfile"); - } - if (defined ($self->{cql_ct})) { - IDZebra::cql_transform_close($self->{cql_ct}); + if ($self->{cql_mapfile} ne $mapfile) { + unless (-f $mapfile) { + croak("Cannot find $mapfile"); + } + if (defined ($self->{cql_ct})) { + IDZebra::cql_transform_close($self->{cql_ct}); + } + $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile); + $self->{cql_mapfile} = $mapfile; } - $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile); - $self->{cql_mapfile} = $mapfile; } return ($self->{cql_mapfile}); } @@ -473,10 +480,60 @@ sub cql2pqf { } my $res = "\0" x 2048; my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048); + unless ($r) {return (undef)}; $res=~s/\0.+$//g; return ($res); } + +# ----------------------------------------------------------------------------- +# Search +# ----------------------------------------------------------------------------- +sub search { + my ($self, %args) = @_; + + if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); } + + my $query; + if ($args{pqf}) { + $query = $args{pqf}; + } + elsif ($args{cql}) { + unless ($query = $self->cql2pqf($args{cql})) { + croak ("Invalid CQL query: '$args{cql}'"); + } + } + unless ($query) { + croak ("No query given to search"); + } + + my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname; + + return ($self->_search_pqf($query, $rsname)); +} + +sub _new_setname { + my ($self) = @_; + return ("set_".$self->{rscount}++); +} + +sub _search_pqf { + my ($self, $query, $setname) = @_; + + my $hits = IDZebra::search_PQF($self->{zh}, + $self->{odr_input}, + $self->{odr_output}, + $query, + $setname); + + my $rs = IDZebra::Resultset->new($self, + name => $setname, + recordCount => $hits, + errCode => $self->errCode, + errString => $self->errString); + return($rs); +} + sub search_cql { my ($self, $query, $transfile) = @_; } @@ -720,6 +777,8 @@ Don't try this at home! This case, the record identifier string (which is normal B Note, that one record can be updated only once within a transaction - all subsequent updates are skipped. +=head1 SEARCHING + =head1 COPYRIGHT diff --git a/perl/test.pl b/perl/test.pl index 3eb490f..592a316 100755 --- a/perl/test.pl +++ b/perl/test.pl @@ -16,7 +16,7 @@ IDZebra::logFile("test.log"); #IDZebra::logLevel(15); -#IDZebra::init(); +IDZebra::init(); # ---------------------------------------------------------------------------- # Session opening and closing @@ -49,31 +49,38 @@ $sess->init(); # ---------------------------------------------------------------------------- # repository upadte + +our $filecount = 6; + $sess->begin_trans; $sess->update(path => 'lib'); my $stat = $sess->end_trans; -ok(($stat->{inserted} == 6), "Inserted 6 records"); +ok(($stat->{inserted} == $filecount), + "Inserted $stat->{inserted}/$filecount records"); $sess->begin_trans; $sess->update(groupName => 'demo1', path => 'lib'); my $stat = $sess->end_trans; -ok(($stat->{updated} == 6), "Updated 6 records"); +ok(($stat->{inserted} == $filecount), + "Inserted $stat->{updated}/$filecount records"); $sess->begin_trans; $sess->delete(groupName => 'demo1', path => 'lib'); my $stat = $sess->end_trans; -ok(($stat->{deleted} == 6), "Deleted 6 records"); +ok(($stat->{deleted} == $filecount), + "Deleted $stat->{deleted}/$filecount records"); $sess->begin_trans; $sess->update(groupName => 'demo1', path => 'lib'); my $stat = $sess->end_trans; -ok(($stat->{inserted} == 6), "Inserted 6 records"); +ok(($stat->{inserted} == $filecount), + "Inserted $stat->{inserted}/$filecount records"); ok(($sess->group->{databaseName} eq "demo2"),"Original group is selected"); @@ -83,10 +90,22 @@ my $rec1=`cat lib/IDZebra/Data1.pm`; my $rec2=`cat lib/IDZebra/Filter.pm`; $sess->begin_trans; -my $s1=$sess->update_record(data => $rec1, +my $s1=$sess->update_record(data => $rec2, recordType => 'grs.perl.pod', groupName => "demo1", ); +my $stat = $sess->end_trans; +ok(($stat->{updated} == 1), "Updated 1 records"); + +#exit; +# ---------------------------------------------------------------------------- +# search +$sess->select_databases('demo2'); +$sess->begin_read; +my $rs1 = $sess->search(cqlmap => 'demo/cql.map', + cql => 'IDZebra'); + +print STDERR "$rs1->{recordCount} hits.\n"; #my $s2=$sess->update_record(data => $rec2); # recordType => "grs.perl.pod"); @@ -96,9 +115,6 @@ my $s1=$sess->update_record(data => $rec1, -my $stat = $sess->end_trans; -ok(($stat->{updated} == 1), "Updated 1 records"); - #$sess->cqlmap("cql.map"); #print STDERR $sess->cql2pqf("job.id <= 5"); #print STDERR $sess->cql2pqf("job.id=5 and dc.title=computer"); diff --git a/perl/zebra_api_ext.c b/perl/zebra_api_ext.c index b217441..94f65ed 100644 --- a/perl/zebra_api_ext.c +++ b/perl/zebra_api_ext.c @@ -244,8 +244,6 @@ int zebra_cql2pqf (cql_transform_t ct, return (status); } - logf (LOG_LOG,"PQF:%s",res); - return (0); }