perl/lib/IDZebra/Session.pm

   1 #!/usr/bin/perl
   2 # ============================================================================
   3 # Zebra perl API header
   4 # =============================================================================
   5 use strict;
   6 # ============================================================================
   7 package IDZebra::Session;
   8 use IDZebra;
   9 use IDZebra::Logger qw(:flags :calls);
  10 #use IDZebra::Repository;
  11 use IDZebra::Resultset;
  12 use Scalar::Util;
  13 use Carp;
  14 use strict;
  15 our @ISA = qw(IDZebra::Logger);
  16
  17 1;
  18 # -----------------------------------------------------------------------------
  19 # Class constructors, destructor
  20 # -----------------------------------------------------------------------------
  21 sub new {
  22     my ($proto, %args) = @_;
  23     my $class = ref($proto) || $proto;
  24     my $self = {};
  25     $self->{args} = \%args;
  26
  27     bless ($self, $class);
  28     $self->{cql_ct} = undef;
  29     return ($self);
  30 }
  31
  32 sub start_service {
  33     my ($self, %args) = @_;
  34
  35     my $zs;
  36     unless (defined($self->{zs})) {
  37         if (defined($args{'configFile'})) {
  38             $self->{zs} = IDZebra::start($args{'configFile'});
  39         } else {
  40             $self->{zs} = IDZebra::start("zebra.cfg");
  41         }
  42     }
  43 }
  44
  45 sub stop_service {
  46     my ($self) = @_;
  47     if (defined($self->{zs})) {
  48         IDZebra::stop($self->{zs}) if ($self->{zs});
  49         $self->{zs} = undef;
  50     }
  51 }
  52
  53
  54 sub open {
  55     my ($proto,%args) = @_;
  56     my $self = {};
  57
  58     if (ref($proto)) { $self = $proto; } else {
  59         $self = $proto->new(%args);
  60     }
  61
  62     unless (%args) {
  63         %args = %{$self->{args}};
  64     }
  65
  66     $self->start_service(%args);
  67
  68     unless (defined($self->{zs})) {
  69         croak ("Falied to open zebra service");
  70     }
  71
  72     unless (defined($self->{zh})) {
  73         $self->{zh}=IDZebra::open($self->{zs}) #if ($self->{zs});
  74     }
  75
  76
  77     # This is needed in order to somehow initialize the service
  78     $self->select_databases("Default");
  79
  80     # Load the default configuration
  81     $self->group(%args);
  82
  83     $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
  84     $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
  85
  86     return ($self);
  87 }
  88
  89 sub close {
  90     my ($self) = @_;
  91
  92     if ($self->{zh}) {
  93         while (IDZebra::trans_no($self->{zh}) > 0) {
  94             logf (LOG_WARN,"Explicitly closing transaction with session");
  95             $self->end_trans;
  96         }
  97
  98         IDZebra::close($self->{zh});
  99         $self->{zh} = undef;
 100     }
 101
 102     if ($self->{odr_input}) {
 103         IDZebra::odr_reset($self->{odr_input});
 104         IDZebra::odr_destroy($self->{odr_input});
 105         $self->{odr_input} = undef;
 106     }
 107
 108     if ($self->{odr_output}) {
 109         IDZebra::odr_reset($self->{odr_output});
 110         IDZebra::odr_destroy($self->{odr_output});
 111         $self->{odr_output} = undef;
 112     }
 113
 114     $self->stop_service;
 115 }
 116
 117 sub DESTROY {
 118     my ($self) = @_;
 119     logf (LOG_LOG,"DESTROY $self");
 120     $self->close;
 121
 122     if (defined ($self->{cql_ct})) {
 123       IDZebra::cql_transform_close($self->{cql_ct});
 124     }
 125 }
 126 # -----------------------------------------------------------------------------
 127 # Record group selection
 128 # -----------------------------------------------------------------------------
 129 sub group {
 130     my ($self,%args) = @_;
 131 #    print STDERR "A\n";
 132     if ($#_ > 0) {
 133         $self->{rg} = $self->_makeRecordGroup(%args);
 134         $self->_selectRecordGroup($self->{rg});
 135     }
 136 #    print STDERR "B\n";
 137     return($self->{rg});
 138 }
 139
 140 sub selectRecordGroup {
 141     my ($self, $groupName) = @_;
 142     $self->{rg} = $self->_getRecordGroup($groupName);
 143     $self->_selectRecordGroup($self->{rg});
 144 }
 145
 146 sub _displayRecordGroup {
 147     my ($self, $rg) = @_;
 148     print STDERR "-----\n";
 149     foreach my $key qw (groupName
 150                         databaseName
 151                         path recordId
 152                         recordType
 153                         flagStoreData
 154                         flagStoreKeys
 155                         flagRw
 156                         fileVerboseLimit
 157                         databaseNamePath
 158                         explainDatabase
 159                         followLinks) {
 160         print STDERR "$key:",$rg->{$key},"\n";
 161     }
 162 }
 163
 164 sub _cloneRecordGroup {
 165     my ($self, $orig) = @_;
 166     my $rg = IDZebra::recordGroup->new();
 167     my $r = IDZebra::init_recordGroup($rg);
 168     foreach my $key qw (groupName
 169                         databaseName
 170                         path
 171                         recordId
 172                         recordType
 173                         flagStoreData
 174                         flagStoreKeys
 175                         flagRw
 176                         fileVerboseLimit
 177                         databaseNamePath
 178                         explainDatabase
 179                         followLinks) {
 180         $rg->{$key} = $orig->{$key} if ($orig->{$key});
 181     }
 182     return ($rg);
 183 }
 184
 185 sub _getRecordGroup {
 186     my ($self, $groupName, $ext) = @_;
 187     my $rg = IDZebra::recordGroup->new();
 188     my $r = IDZebra::init_recordGroup($rg);
 189     $rg->{groupName} = $groupName if ($groupName ne "");
 190     $ext = "" unless ($ext);
 191     my $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
 192     return ($rg);
 193 }
 194
 195 sub _makeRecordGroup {
 196     my ($self, %args) = @_;
 197     my $rg;
 198
 199     my @keys = keys(%args);
 200     unless ($#keys >= 0) {
 201         return ($self->{rg});
 202     }
 203
 204     if ($args{groupName}) {
 205         $rg = $self->_getRecordGroup($args{groupName});
 206     } else {
 207         $rg = $self->_cloneRecordGroup($self->{rg});
 208     }
 209     $self->_setRecordGroupOptions($rg, %args);
 210     return ($rg);
 211 }
 212
 213 sub _setRecordGroupOptions {
 214     my ($self, $rg, %args) = @_;
 215
 216     foreach my $key qw (databaseName
 217                         path
 218                         recordId
 219                         recordType
 220                         flagStoreData
 221                         flagStoreKeys
 222                         flagRw
 223                         fileVerboseLimit
 224                         databaseNamePath
 225                         explainDatabase
 226                         followLinks) {
 227         if (defined ($args{$key})) {
 228             $rg->{$key} = $args{$key};
 229         }
 230     }
 231 }
 232 sub _selectRecordGroup {
 233     my ($self, $rg) = @_;
 234     my $r = IDZebra::set_group($self->{zh}, $rg);
 235     my $dbName;
 236     unless ($dbName = $rg->{databaseName}) {
 237         $dbName = 'Default';
 238     }
 239     if (IDZebra::select_database($self->{zh}, $dbName)) {
 240         logf(LOG_FATAL,
 241              "Could not select database %s errCode=%d",
 242              $dbName,
 243              $self->errCode());
 244         croak("Fatal error selecting record group");
 245     } else {
 246         logf(LOG_LOG,"Database %s selected",$dbName);
 247     }
 248 }
 249
 250 # -----------------------------------------------------------------------------
 251 # Error handling
 252 # -----------------------------------------------------------------------------
 253 sub errCode {
 254     my ($self) = @_;
 255     return(IDZebra::errCode($self->{zh}));
 256 }
 257
 258 sub errString {
 259     my ($self) = @_;
 260     return(IDZebra::errString($self->{zh}));
 261 }
 262
 263 sub errAdd {
 264     my ($self) = @_;
 265     return(IDZebra::errAdd($self->{zh}));
 266 }
 267
 268 # -----------------------------------------------------------------------------
 269 # Transaction stuff
 270 # -----------------------------------------------------------------------------
 271 sub begin_trans {
 272     my ($self) = @_;
 273     IDZebra::begin_trans($self->{zh});
 274 }
 275
 276
 277
 278
 279 sub end_trans {
 280     my ($self) = @_;
 281     my $stat = IDZebra::ZebraTransactionStatus->new();
 282     IDZebra::end_trans($self->{zh}, $stat);
 283     return ($stat);
 284 }
 285
 286 sub begin_read {
 287     my ($self) =@_;
 288     return(IDZebra::begin_read($self->{zh}));
 289 }
 290
 291 sub end_read {
 292     my ($self) =@_;
 293     IDZebra::end_read($self->{zh});
 294 }
 295
 296 sub shadow_enable {
 297     my ($self, $value) = @_;
 298     if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
 299     return (IDZebra::get_shadow_enable($self->{zh}));
 300 }
 301
 302 sub commit {
 303     my ($self) = @_;
 304     if ($self->shadow_enable) {
 305         return(IDZebra::commit($self->{zh}));
 306     }
 307 }
 308
 309 # -----------------------------------------------------------------------------
 310 # We don't really need that...
 311 # -----------------------------------------------------------------------------
 312 sub odr_reset {
 313     my ($self, $name) = @_;
 314     if ($name !~/^(input|output)$/) {
 315         croak("Undefined ODR '$name'");
 316     }
 317   IDZebra::odr_reset($self->{"odr_$name"});
 318 }
 319
 320 # -----------------------------------------------------------------------------
 321 # Init/compact
 322 # -----------------------------------------------------------------------------
 323 sub init {
 324     my ($self) = @_;
 325     return(IDZebra::init($self->{zh}));
 326 }
 327
 328 sub compact {
 329     my ($self) = @_;
 330     return(IDZebra::compact($self->{zh}));
 331 }
 332
 333 sub update {
 334     my ($self, %args) = @_;
 335     my $rg = $self->update_args(%args);
 336     $self->begin_trans;
 337     IDZebra::repository_update($self->{zh});
 338     $self->_selectRecordGroup($self->{rg});
 339     $self->end_trans;
 340 }
 341
 342 sub delete {
 343     my ($self, %args) = @_;
 344     my $rg = $self->update_args(%args);
 345     $self->begin_trans;
 346     IDZebra::repository_delete($self->{zh});
 347     $self->_selectRecordGroup($self->{rg});
 348     $self->end_trans;
 349 }
 350
 351 sub show {
 352     my ($self, %args) = @_;
 353     my $rg = $self->update_args(%args);
 354     $self->begin_trans;
 355     IDZebra::repository_show($self->{zh});
 356     $self->_selectRecordGroup($self->{rg});
 357     $self->end_trans;
 358 }
 359
 360 sub update_args {
 361     my ($self, %args) = @_;
 362     my $rg = $self->_makeRecordGroup(%args);
 363     $self->_selectRecordGroup($rg);
 364     return ($rg);
 365 }
 366
 367 # -----------------------------------------------------------------------------
 368 # Per record update
 369 # -----------------------------------------------------------------------------
 370
 371 sub update_record {
 372     my ($self, %args) = @_;
 373     return(IDZebra::update_record($self->{zh},
 374                                   $self->record_update_args(%args)));
 375 }
 376
 377 sub delete_record {
 378     my ($self, %args) = @_;
 379     return(IDZebra::delete_record($self->{zh},
 380                                   $self->record_update_args(%args)));
 381 }
 382 sub record_update_args {
 383     my ($self, %args) = @_;
 384
 385     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 386     my $match   = $args{match}      ? $args{match}      : "";
 387     my $rectype = $args{recordType} ? $args{recordType} : "";
 388     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 389
 390     my $buff;
 391
 392     if ($args{data}) {
 393         $buff = $args{data};
 394     }
 395     elsif ($args{file}) {
 396         open (F, $args{file}) || warn ("Cannot open $args{file}");
 397         $buff = join('',(<F>));
 398         close (F);
 399     }
 400     my $len = length($buff);
 401
 402     delete ($args{sysno});
 403     delete ($args{match});
 404     delete ($args{recordType});
 405     delete ($args{file});
 406     delete ($args{data});
 407
 408     my $rg = $self->_makeRecordGroup(%args);
 409
 410     # If no record type is given, then try to find it out from the
 411     # file extension;
 412     unless ($rectype) {
 413         if (my ($ext) = $fname =~ /\.(\w+)$/) {
 414             my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
 415             $rectype = $rg2->{recordType};
 416         }
 417     }
 418
 419     $rg->{databaseName} = "Default" unless ($rg->{databaseName});
 420
 421 #    print STDERR "$rectype,$sysno,$match,$fname,$len\n";
 422     unless ($rectype) {
 423         $rectype="";
 424     }
 425     return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
 426 }
 427
 428 # -----------------------------------------------------------------------------
 429 # Search
 430 # -----------------------------------------------------------------------------
 431 sub select_databases {
 432     my ($self, @databases) = @_;
 433     return (IDZebra::select_databases($self->{zh},
 434                                       ($#databases + 1),
 435                                       \@databases));
 436 }
 437
 438 sub search_pqf {
 439     my ($self, $query, $setname) = @_;
 440     my $hits = IDZebra::search_PQF($self->{zh},
 441                                    $self->{odr_input},
 442                                    $self->{odr_output},
 443                                    $query,
 444                                    $setname);
 445
 446     my $rs  = IDZebra::Resultset->new($self,
 447                                       name        => $setname,
 448                                       recordCount => $hits,
 449                                       errCode     => $self->errCode,
 450                                       errString   => $self->errString);
 451     return($rs);
 452 }
 453
 454 sub cqlmap {
 455     my ($self,$mapfile) = @_;
 456     if ($#_ > 0) {
 457         unless (-f $mapfile) {
 458             croak("Cannot find $mapfile");
 459         }
 460         if (defined ($self->{cql_ct})) {
 461           IDZebra::cql_transform_close($self->{cql_ct});
 462         }
 463         $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
 464         $self->{cql_mapfile} = $mapfile;
 465     }
 466     return ($self->{cql_mapfile});
 467 }
 468
 469 sub cql2pqf {
 470     my ($self, $cqlquery) = @_;
 471     unless (defined($self->{cql_ct})) {
 472         croak("CQL map file is not specified yet.");
 473     }
 474     my $res = "\0" x 2048;
 475     my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
 476     $res=~s/\0.+$//g;
 477     return ($res);
 478 }
 479
 480 sub search_cql {
 481     my ($self, $query, $transfile) = @_;
 482 }
 483
 484
 485 sub search_ccl {
 486     my ($self, $query, $transfile) = @_;
 487 }
 488
 489 # -----------------------------------------------------------------------------
 490 # Sort
 491 #
 492 # Sorting of multiple result sets is not supported by zebra...
 493 # -----------------------------------------------------------------------------
 494
 495 sub sortResultsets {
 496     my ($self, $sortspec, $setname, @sets) = @_;
 497
 498     my @setnames;
 499     my $count = 0;
 500     foreach my $rs (@sets) {
 501         push (@setnames, $rs->{name});
 502         $count += $rs->{recordCount};  # is this really sure ??? It doesn't
 503                                        # matter now...
 504     }
 505
 506     my $status = IDZebra::sort($self->{zh},
 507                                $self->{odr_output},
 508                                $sortspec,
 509                                $setname,
 510                                \@setnames);
 511
 512     my $errCode = $self->errCode;
 513     my $errString = $self->errString;
 514
 515     if ($status || $errCode) {$count = 0;}
 516
 517     my $rs  = IDZebra::Resultset->new($self,
 518                                       name        => $setname,
 519                                       recordCount => $count,
 520                                       errCode     => $errCode,
 521                                       errString   => $errString);
 522
 523     return ($rs);
 524 }
 525
 526
 527 __END__
 528
 529 =head1 NAME
 530
 531 IDZebra::Session - A Zebra database server session for update and retrieval
 532
 533 =head1 SYNOPSIS
 534
 535   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 536   $sess->open();
 537
 538   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
 539
 540   $sess->close;
 541
 542 =head1 DESCRIPTION
 543
 544 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
 545
 546 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
 547
 548 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
 549
 550 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
 551
 552   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 553   $sess->open();
 554
 555 or
 556
 557   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
 558
 559 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
 560
 561   $sess->close();
 562
 563 This will
 564   - close all transactions
 565   - destroy all result sets
 566   - close the session
 567
 568 In the future different database access methods are going to be available,
 569 like:
 570
 571   $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
 572
 573 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
 574
 575   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 576                                  groupName  => 'demo');
 577
 578
 579 =head1 RECORD GROUPS
 580
 581 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
 582
 583 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
 584
 585   $sess->group(groupName => ..., ...)
 586
 587 The following options are available:
 588
 589 =over 4
 590
 591 =item B<groupName>
 592
 593 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
 594
 595 =item B<databaseName>
 596
 597 The name of the (logical) database the updated records will belong to.
 598
 599 =item B<path>
 600
 601 This path is used for directory updates (B<update>, B<delete> methods);
 602
 603 =item B<recordId>
 604
 605 This option determines how to identify your records. See I<Zebra manual: Locating Records>
 606
 607 =item B<recordType>
 608
 609 The record type used for indexing.
 610
 611 =item B<flagStoreData>
 612
 613 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
 614
 615 =item B<flagStoreKeys>
 616
 617 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
 618
 619 =item B<flagRw>
 620
 621 ?
 622
 623 =item B<fileVerboseLimit>
 624
 625 Skip log messages, when doing a directory update, and the specified number of files are processed...
 626
 627 =item B<databaseNamePath>
 628
 629 ?
 630
 631 =item B<explainDatabase>
 632
 633 The name of the explain database to be used
 634
 635 =item B<followLinks>
 636
 637 Follow links when doing directory update.
 638
 639 =back
 640
 641 You can use the same parameters calling all update methods.
 642
 643 =head1 TRANSACTIONS (WRITE LOCKS)
 644
 645 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
 646
 647   $sess->begin_trans;
 648
 649 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
 650
 651   $stat = $sess->end_trans;
 652
 653 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
 654
 655   $stat->{processed} # Number of records processed
 656   $stat->{updated}   # Number of records processed
 657   $stat->{deleted}   # Number of records processed
 658   $stat->{inserted}  # Number of records processed
 659   $stat->{stime}     # System time used
 660   $stat->{utime}     # User time used
 661
 662 =head1 UPDATING DATA
 663
 664 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
 665
 666   $sess->update(path      =>  'lib');
 667
 668 This will update the database with the files in directory "lib", according to the current record group settings.
 669
 670   $sess->update();
 671
 672 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
 673
 674   $sess->update(groupName => 'demo1',
 675                 path      =>  'lib');
 676
 677 Update the database with files in "lib" according to the settings of group "demo1"
 678
 679   $sess->delete(groupName => 'demo1',
 680                 path      =>  'lib');
 681
 682 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
 683
 684 You can also update records one by one, even directly from the memory:
 685
 686   $sysno = $sess->update_record(data       => $rec1,
 687                                 recordType => 'grs.perl.pod',
 688                                 groupName  => "demo1");
 689
 690 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
 691
 692 You can also index a single file:
 693
 694   $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
 695
 696 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
 697
 698   $sysno = $sess->update_record(data => $rec1,
 699                                 file => "lib/IDZebra/Data1.pm");
 700
 701 And some crazy stuff:
 702
 703   $sysno = $sess->delete_record(sysno => $sysno);
 704
 705 where sysno in itself is sufficient to identify the record
 706
 707   $sysno = $sess->delete_record(data => $rec1,
 708                                 recordType => 'grs.perl.pod',
 709                                 groupName  => "demo1");
 710
 711 This case the record is extracted, and if already exists, located in the database, then deleted...
 712
 713   $sysno = $sess->delete_record(data       => $rec1,
 714                                 match      => $myid,
 715                                 recordType => 'grs.perl.pod',
 716                                 groupName  => "demo1");
 717
 718 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
 719
 720
 721 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
 722
 723
 724 =head1 COPYRIGHT
 725
 726 Fill in
 727
 728 =head1 AUTHOR
 729
 730 Peter Popovics, pop@technomat.hu
 731
 732 =head1 SEE ALSO
 733
 734 IDZebra, IDZebra::Data1, Zebra documentation
 735
 736 =cut