1 # $Id: Session.pm,v 1.6 2003-02-28 18:45:50 pop Exp $
3 # Zebra perl API header
4 # =============================================================================
5 package IDZebra::Session;
12 use IDZebra::Logger qw(:flags :calls);
13 use IDZebra::Resultset;
16 our $VERSION = do { my @r = (q$Revision: 1.6 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
17 our @ISA = qw(IDZebra::Logger);
21 # -----------------------------------------------------------------------------
22 # Class constructors, destructor
23 # -----------------------------------------------------------------------------
25 my ($proto, %args) = @_;
26 my $class = ref($proto) || $proto;
28 $self->{args} = \%args;
30 bless ($self, $class);
31 $self->{cql_ct} = undef;
34 $self->{databases} = {};
38 my ($self, %args) = @_;
41 unless (defined($self->{zs})) {
42 if (defined($args{'configFile'})) {
43 $self->{zs} = IDZebra::start($args{'configFile'});
45 $self->{zs} = IDZebra::start("zebra.cfg");
52 if (defined($self->{zs})) {
53 IDZebra::stop($self->{zs}) if ($self->{zs});
60 my ($proto,%args) = @_;
63 if (ref($proto)) { $self = $proto; } else {
64 $self = $proto->new(%args);
68 %args = %{$self->{args}};
71 $self->start_service(%args);
73 unless (defined($self->{zs})) {
74 croak ("Falied to open zebra service");
77 unless (defined($self->{zh})) {
78 $self->{zh}=IDZebra::open($self->{zs});
81 # Reset result set counter
84 # This is needed in order to somehow initialize the service
85 $self->databases("Default");
87 # Load the default configuration
90 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
91 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
100 while (IDZebra::trans_no($self->{zh}) > 0) {
101 logf (LOG_WARN,"Explicitly closing transaction with session");
105 IDZebra::close($self->{zh});
109 if ($self->{odr_input}) {
110 IDZebra::odr_reset($self->{odr_input});
111 IDZebra::odr_destroy($self->{odr_input});
112 $self->{odr_input} = undef;
115 if ($self->{odr_output}) {
116 IDZebra::odr_reset($self->{odr_output});
117 IDZebra::odr_destroy($self->{odr_output});
118 $self->{odr_output} = undef;
126 logf (LOG_LOG,"DESTROY $self");
129 if (defined ($self->{cql_ct})) {
130 IDZebra::cql_transform_close($self->{cql_ct});
133 # -----------------------------------------------------------------------------
134 # Record group selection This is a bit nasty... but used at many places
135 # -----------------------------------------------------------------------------
137 my ($self,%args) = @_;
139 $self->{rg} = $self->_makeRecordGroup(%args);
140 $self->_selectRecordGroup($self->{rg});
145 sub selectRecordGroup {
146 my ($self, $groupName) = @_;
147 $self->{rg} = $self->_getRecordGroup($groupName);
148 $self->_selectRecordGroup($self->{rg});
151 sub _displayRecordGroup {
152 my ($self, $rg) = @_;
153 print STDERR "-----\n";
154 foreach my $key qw (groupName
165 print STDERR "$key:",$rg->{$key},"\n";
169 sub _cloneRecordGroup {
170 my ($self, $orig) = @_;
171 my $rg = IDZebra::recordGroup->new();
172 my $r = IDZebra::init_recordGroup($rg);
173 foreach my $key qw (groupName
185 $rg->{$key} = $orig->{$key} if ($orig->{$key});
190 sub _getRecordGroup {
191 my ($self, $groupName, $ext) = @_;
192 my $rg = IDZebra::recordGroup->new();
193 my $r = IDZebra::init_recordGroup($rg);
194 $rg->{groupName} = $groupName if ($groupName ne "");
195 $ext = "" unless ($ext);
196 my $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
200 sub _makeRecordGroup {
201 my ($self, %args) = @_;
204 my @keys = keys(%args);
205 unless ($#keys >= 0) {
206 return ($self->{rg});
209 if ($args{groupName}) {
210 $rg = $self->_getRecordGroup($args{groupName});
212 $rg = $self->_cloneRecordGroup($self->{rg});
214 $self->_setRecordGroupOptions($rg, %args);
218 sub _setRecordGroupOptions {
219 my ($self, $rg, %args) = @_;
221 foreach my $key qw (databaseName
232 if (defined ($args{$key})) {
233 $rg->{$key} = $args{$key};
237 sub _selectRecordGroup {
238 my ($self, $rg) = @_;
239 my $r = IDZebra::set_group($self->{zh}, $rg);
241 unless ($dbName = $rg->{databaseName}) {
244 unless ($self->databases($dbName)) {
245 croak("Fatal error selecting database $dbName");
248 # -----------------------------------------------------------------------------
249 # Selecting databases for search (and also for updating - internally)
250 # -----------------------------------------------------------------------------
252 my ($self, @databases) = @_;
255 return (keys(%{$self->{databases}}));
261 foreach my $db (@databases) {
262 next if ($self->{databases}{$db});
267 foreach my $db (keys (%{$self->{databases}})) {
268 $changed++ unless ($tmp{$db});
273 delete ($self->{databases});
274 foreach my $db (@databases) {
275 $self->{databases}{$db}++;
278 if (IDZebra::select_databases($self->{zh},
282 "Could not select database(s) %s errCode=%d",
283 join(",",@databases),
287 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
290 return (keys(%{$self->{databases}}));
293 # -----------------------------------------------------------------------------
295 # -----------------------------------------------------------------------------
298 return(IDZebra::errCode($self->{zh}));
303 return(IDZebra::errString($self->{zh}));
308 return(IDZebra::errAdd($self->{zh}));
311 # -----------------------------------------------------------------------------
313 # -----------------------------------------------------------------------------
316 IDZebra::begin_trans($self->{zh});
321 my $stat = IDZebra::ZebraTransactionStatus->new();
322 IDZebra::end_trans($self->{zh}, $stat);
328 return(IDZebra::begin_read($self->{zh}));
333 IDZebra::end_read($self->{zh});
337 my ($self, $value) = @_;
338 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
339 return (IDZebra::get_shadow_enable($self->{zh}));
344 if ($self->shadow_enable) {
345 return(IDZebra::commit($self->{zh}));
349 # -----------------------------------------------------------------------------
350 # We don't really need that...
351 # -----------------------------------------------------------------------------
353 my ($self, $name) = @_;
354 if ($name !~/^(input|output)$/) {
355 croak("Undefined ODR '$name'");
357 IDZebra::odr_reset($self->{"odr_$name"});
360 # -----------------------------------------------------------------------------
362 # -----------------------------------------------------------------------------
365 return(IDZebra::init($self->{zh}));
370 return(IDZebra::compact($self->{zh}));
374 my ($self, %args) = @_;
375 my $rg = $self->_update_args(%args);
376 $self->_selectRecordGroup($rg);
378 IDZebra::repository_update($self->{zh});
379 $self->_selectRecordGroup($self->{rg});
384 my ($self, %args) = @_;
385 my $rg = $self->_update_args(%args);
386 $self->_selectRecordGroup($rg);
388 IDZebra::repository_delete($self->{zh});
389 $self->_selectRecordGroup($self->{rg});
394 my ($self, %args) = @_;
395 my $rg = $self->_update_args(%args);
396 $self->_selectRecordGroup($rg);
398 IDZebra::repository_show($self->{zh});
399 $self->_selectRecordGroup($self->{rg});
404 my ($self, %args) = @_;
405 my $rg = $self->_makeRecordGroup(%args);
406 $self->_selectRecordGroup($rg);
410 # -----------------------------------------------------------------------------
412 # -----------------------------------------------------------------------------
415 my ($self, %args) = @_;
416 return(IDZebra::update_record($self->{zh},
417 $self->_record_update_args(%args)));
421 my ($self, %args) = @_;
422 return(IDZebra::delete_record($self->{zh},
423 $self->_record_update_args(%args)));
425 sub _record_update_args {
426 my ($self, %args) = @_;
428 my $sysno = $args{sysno} ? $args{sysno} : 0;
429 my $match = $args{match} ? $args{match} : "";
430 my $rectype = $args{recordType} ? $args{recordType} : "";
431 my $fname = $args{file} ? $args{file} : "<no file>";
438 elsif ($args{file}) {
439 open (F, $args{file}) || warn ("Cannot open $args{file}");
440 $buff = join('',(<F>));
443 my $len = length($buff);
445 delete ($args{sysno});
446 delete ($args{match});
447 delete ($args{recordType});
448 delete ($args{file});
449 delete ($args{data});
451 my $rg = $self->_makeRecordGroup(%args);
453 # If no record type is given, then try to find it out from the
456 if (my ($ext) = $fname =~ /\.(\w+)$/) {
457 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
458 $rectype = $rg2->{recordType};
462 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
464 # print STDERR "$rectype,$sysno,$match,$fname,$len\n";
468 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
471 # -----------------------------------------------------------------------------
474 my ($self,$mapfile) = @_;
476 if ($self->{cql_mapfile} ne $mapfile) {
477 unless (-f $mapfile) {
478 croak("Cannot find $mapfile");
480 if (defined ($self->{cql_ct})) {
481 IDZebra::cql_transform_close($self->{cql_ct});
483 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
484 $self->{cql_mapfile} = $mapfile;
487 return ($self->{cql_mapfile});
491 my ($self, $cqlquery) = @_;
492 unless (defined($self->{cql_ct})) {
493 croak("CQL map file is not specified yet.");
495 my $res = "\0" x 2048;
496 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
497 unless ($r) {return (undef)};
503 # -----------------------------------------------------------------------------
505 # -----------------------------------------------------------------------------
507 my ($self, %args) = @_;
509 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
516 unless ($query = $self->cql2pqf($args{cql})) {
517 croak ("Invalid CQL query: '$args{cql}'");
521 croak ("No query given to search");
526 if ($args{databases}) {
527 @origdbs = $self->databases;
528 $self->databases(@{$args{databases}});
531 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
533 my $rs = $self->_search_pqf($query, $rsname);
535 if ($args{databases}) {
536 $self->databases(@origdbs);
544 return ("set_".$self->{rscount}++);
548 my ($self, $query, $setname) = @_;
550 my $hits = IDZebra::search_PQF($self->{zh},
556 my $rs = IDZebra::Resultset->new($self,
558 recordCount => $hits,
559 errCode => $self->errCode,
560 errString => $self->errString);
564 # -----------------------------------------------------------------------------
567 # Sorting of multiple result sets is not supported by zebra...
568 # -----------------------------------------------------------------------------
571 my ($self, $sortspec, $setname, @sets) = @_;
575 foreach my $rs (@sets) {
576 push (@setnames, $rs->{name});
577 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
581 my $status = IDZebra::sort($self->{zh},
587 my $errCode = $self->errCode;
588 my $errString = $self->errString;
590 if ($status || $errCode) {$count = 0;}
592 my $rs = IDZebra::Resultset->new($self,
594 recordCount => $count,
596 errString => $errString);
601 # ============================================================================
608 IDZebra::Session - A Zebra database server session for update and retrieval
612 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
615 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
616 groupName => 'demo1');
618 $sess->group(groupName => 'demo2');
624 $sess->update(path => 'lib');
626 my $s1=$sess->update_record(data => $rec1,
627 recordType => 'grs.perl.pod',
628 groupName => "demo1",
631 my $stat = $sess->end_trans;
633 $sess->databases('demo1','demo2');
635 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
636 cql => 'dc.title=IDZebra',
637 databases => [qw(demo1 demo2)]);
642 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
644 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
646 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
648 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
650 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
655 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
657 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
662 - close all transactions
663 - destroy all result sets
666 In the future different database access methods are going to be available,
669 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
671 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
673 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
674 groupName => 'demo');
679 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
681 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
683 $sess->group(groupName => ..., ...)
685 The following options are available:
691 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
693 =item B<databaseName>
695 The name of the (logical) database the updated records will belong to.
699 This path is used for directory updates (B<update>, B<delete> methods);
703 This option determines how to identify your records. See I<Zebra manual: Locating Records>
707 The record type used for indexing.
709 =item B<flagStoreData>
711 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
713 =item B<flagStoreKeys>
715 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
721 =item B<fileVerboseLimit>
723 Skip log messages, when doing a directory update, and the specified number of files are processed...
725 =item B<databaseNamePath>
729 =item B<explainDatabase>
731 The name of the explain database to be used
735 Follow links when doing directory update.
739 You can use the same parameters calling all update methods.
741 =head1 TRANSACTIONS (WRITE LOCKS)
743 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
747 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
749 $stat = $sess->end_trans;
751 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
753 $stat->{processed} # Number of records processed
754 $stat->{updated} # Number of records processed
755 $stat->{deleted} # Number of records processed
756 $stat->{inserted} # Number of records processed
757 $stat->{stime} # System time used
758 $stat->{utime} # User time used
762 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
764 $sess->update(path => 'lib');
766 This will update the database with the files in directory "lib", according to the current record group settings.
770 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
772 $sess->update(groupName => 'demo1',
775 Update the database with files in "lib" according to the settings of group "demo1"
777 $sess->delete(groupName => 'demo1',
780 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
782 You can also update records one by one, even directly from the memory:
784 $sysno = $sess->update_record(data => $rec1,
785 recordType => 'grs.perl.pod',
786 groupName => "demo1");
788 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
790 You can also index a single file:
792 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
794 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
796 $sysno = $sess->update_record(data => $rec1,
797 file => "lib/IDZebra/Data1.pm");
799 And some crazy stuff:
801 $sysno = $sess->delete_record(sysno => $sysno);
803 where sysno in itself is sufficient to identify the record
805 $sysno = $sess->delete_record(data => $rec1,
806 recordType => 'grs.perl.pod',
807 groupName => "demo1");
809 This case the record is extracted, and if already exists, located in the database, then deleted...
811 $sysno = $sess->delete_record(data => $rec1,
813 recordType => 'grs.perl.pod',
814 groupName => "demo1");
816 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
819 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
821 =head1 DATABASE SELECTION
823 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
825 For searching, you can select databases by calling:
827 $sess->databases('db1','db2');
829 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
831 @dblist = $sess->databases();
835 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
837 $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
838 pqf => '@attr 1=4 computer');
840 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
844 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
846 The CCL searching is not currently supported by this API.
850 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
852 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
854 $sess->cqlmap($mapfile);
856 Or, you can directly provide the I<mapfile> parameter for the search:
858 my $rs1 = $sess->search(cqlmap => 'demo/cql.map',
859 cql => 'dc.title=IDZebra');
861 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
865 As you have seen, the result of the search request is a I<Resultset> object.
866 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
870 printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
872 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
874 =head1 MISC FUNCTIONS
882 Peter Popovics, pop@technomat.hu
886 IDZebra, IDZebra::Data1, Zebra documentation