perl/lib/IDZebra/Session.pm

   1 # $Id: Session.pm,v 1.15 2003-03-13 04:25:18 pop Exp $
   2 #
   3 # Zebra perl API header
   4 # =============================================================================
   5 package IDZebra::Session;
   6
   7 use strict;
   8 use warnings;
   9
  10 BEGIN {
  11     use IDZebra;
  12     use Scalar::Util;
  13     use IDZebra::Logger qw(:flags :calls);
  14     use IDZebra::Resultset;
  15     use IDZebra::ScanList;
  16     use IDZebra::RetrievalRecord;
  17     require Exporter;
  18     our $VERSION = do { my @r = (q$Revision: 1.15 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  19     our @ISA = qw(IDZebra::Logger Exporter);
  20     our @EXPORT = qw (TRANS_RW TRANS_RO);
  21 }
  22
  23 use constant TRANS_RW => 1;
  24 use constant TRANS_RO => 0;
  25
  26 1;
  27 # -----------------------------------------------------------------------------
  28 # Class constructors, destructor
  29 # -----------------------------------------------------------------------------
  30 sub new {
  31     my ($proto, %args) = @_;
  32     my $class = ref($proto) || $proto;
  33     my $self = {};
  34     $self->{args} = \%args;
  35
  36     bless ($self, $class);
  37     $self->{cql_ct} = undef;
  38     $self->{cql_mapfile} = "";
  39     return ($self);
  40
  41     $self->{databases} = {};
  42 }
  43
  44 sub start_service {
  45     my ($self, %args) = @_;
  46
  47     my $zs;
  48     unless (defined($self->{zs})) {
  49         if (defined($args{'configFile'})) {
  50             $self->{zs} = IDZebra::start($args{'configFile'});
  51         } else {
  52             $self->{zs} = IDZebra::start("zebra.cfg");
  53         }
  54     }
  55 }
  56
  57 sub stop_service {
  58     my ($self) = @_;
  59     if (defined($self->{zs})) {
  60         IDZebra::stop($self->{zs}) if ($self->{zs});
  61         $self->{zs} = undef;
  62     }
  63 }
  64
  65
  66 sub open {
  67     my ($proto,%args) = @_;
  68     my $self = {};
  69
  70     if (ref($proto)) { $self = $proto; } else {
  71         $self = $proto->new(%args);
  72     }
  73
  74     unless (%args) {
  75         %args = %{$self->{args}};
  76     }
  77
  78     $self->start_service(%args);
  79
  80     unless (defined($self->{zs})) {
  81         croak ("Falied to open zebra service");
  82     }
  83
  84     unless (defined($self->{zh})) {
  85         $self->{zh}=IDZebra::open($self->{zs});
  86     }
  87
  88     # Reset result set counter
  89     $self->{rscount} = 0;
  90
  91     # This is needed in order to somehow initialize the service
  92     $self->databases("Default");
  93
  94     # Load the default configuration
  95     $self->group(%args);
  96
  97
  98     # Set shadow usage
  99     my $shadow = defined($args{shadow}) ? $args{shadow} : 0;
 100     $self->shadow($shadow);
 101
 102     $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
 103     $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
 104
 105     return ($self);
 106 }
 107
 108 sub checkzh {
 109     my ($self) = @_;
 110     unless (defined($self->{zh})) {
 111         croak ("Zebra session is not opened");
 112     }
 113 }
 114
 115 sub close {
 116     my ($self) = @_;
 117
 118     if ($self->{zh}) {
 119
 120         my $stats = 0;
 121         # Delete all resulsets
 122         my $r = IDZebra::deleteResultSet($self->{zh},
 123                                          1, #Z_DeleteRequest_all,
 124                                          0,[],
 125                                          $stats);
 126
 127         while (IDZebra::trans_no($self->{zh}) > 0) {
 128             logf (LOG_WARN,"Explicitly closing transaction with session");
 129             $self->end_trans;
 130         }
 131
 132         IDZebra::close($self->{zh});
 133         $self->{zh} = undef;
 134     }
 135
 136     if ($self->{odr_input}) {
 137         IDZebra::odr_reset($self->{odr_input});
 138         IDZebra::odr_destroy($self->{odr_input});
 139         $self->{odr_input} = undef;
 140     }
 141
 142     if ($self->{odr_output}) {
 143         IDZebra::odr_reset($self->{odr_output});
 144         IDZebra::odr_destroy($self->{odr_output});
 145         $self->{odr_output} = undef;
 146     }
 147
 148     $self->stop_service;
 149 }
 150
 151 sub DESTROY {
 152     my ($self) = @_;
 153     logf (LOG_LOG,"DESTROY $self");
 154     $self->close;
 155
 156     if (defined ($self->{cql_ct})) {
 157       IDZebra::cql_transform_close($self->{cql_ct});
 158     }
 159
 160 }
 161 # -----------------------------------------------------------------------------
 162 # Record group selection  This is a bit nasty... but used at many places
 163 # -----------------------------------------------------------------------------
 164 sub group {
 165     my ($self,%args) = @_;
 166     $self->checkzh;
 167     if ($#_ > 0) {
 168         $self->{rg} = $self->_makeRecordGroup(%args);
 169         $self->_selectRecordGroup($self->{rg});
 170     }
 171     return($self->{rg});
 172 }
 173
 174 sub selectRecordGroup {
 175     my ($self, $groupName) = @_;
 176     $self->checkzh;
 177     $self->{rg} = $self->_getRecordGroup($groupName);
 178     $self->_selectRecordGroup($self->{rg});
 179 }
 180
 181 sub _displayRecordGroup {
 182     my ($self, $rg) = @_;
 183     print STDERR "-----\n";
 184     foreach my $key qw (groupName
 185                         databaseName
 186                         path recordId
 187                         recordType
 188                         flagStoreData
 189                         flagStoreKeys
 190                         flagRw
 191                         fileVerboseLimit
 192                         databaseNamePath
 193                         explainDatabase
 194                         followLinks) {
 195         print STDERR "$key:",$rg->{$key},"\n";
 196     }
 197 }
 198
 199 sub _cloneRecordGroup {
 200     my ($self, $orig) = @_;
 201     my $rg = IDZebra::recordGroup->new();
 202     my $r = IDZebra::init_recordGroup($rg);
 203     foreach my $key qw (groupName
 204                         databaseName
 205                         path
 206                         recordId
 207                         recordType
 208                         flagStoreData
 209                         flagStoreKeys
 210                         flagRw
 211                         fileVerboseLimit
 212                         databaseNamePath
 213                         explainDatabase
 214                         followLinks) {
 215         $rg->{$key} = $orig->{$key} if ($orig->{$key});
 216     }
 217     return ($rg);
 218 }
 219
 220 sub _getRecordGroup {
 221     my ($self, $groupName, $ext) = @_;
 222     my $rg = IDZebra::recordGroup->new();
 223     my $r = IDZebra::init_recordGroup($rg);
 224     $rg->{groupName} = $groupName if ($groupName ne "");
 225     $ext = "" unless ($ext);
 226     $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
 227     return ($rg);
 228 }
 229
 230 sub _makeRecordGroup {
 231     my ($self, %args) = @_;
 232     my $rg;
 233
 234     my @keys = keys(%args);
 235     unless ($#keys >= 0) {
 236         return ($self->{rg});
 237     }
 238
 239     if ($args{groupName}) {
 240         $rg = $self->_getRecordGroup($args{groupName});
 241     } else {
 242         $rg = $self->_cloneRecordGroup($self->{rg});
 243     }
 244     $self->_setRecordGroupOptions($rg, %args);
 245     return ($rg);
 246 }
 247
 248 sub _setRecordGroupOptions {
 249     my ($self, $rg, %args) = @_;
 250
 251     foreach my $key qw (databaseName
 252                         path
 253                         recordId
 254                         recordType
 255                         flagStoreData
 256                         flagStoreKeys
 257                         flagRw
 258                         fileVerboseLimit
 259                         databaseNamePath
 260                         explainDatabase
 261                         followLinks) {
 262         if (defined ($args{$key})) {
 263             $rg->{$key} = $args{$key};
 264         }
 265     }
 266 }
 267 sub _selectRecordGroup {
 268     my ($self, $rg) = @_;
 269
 270     my $r = IDZebra::set_group($self->{zh}, $rg);
 271     my $dbName;
 272     unless ($dbName = $rg->{databaseName}) {
 273         $dbName = 'Default';
 274     }
 275     unless ($self->databases($dbName)) {
 276         croak("Fatal error selecting database $dbName");
 277     }
 278 }
 279 # -----------------------------------------------------------------------------
 280 # Selecting databases for search (and also for updating - internally)
 281 # -----------------------------------------------------------------------------
 282 sub databases {
 283     my ($self, @databases) = @_;
 284
 285     $self->checkzh;
 286
 287     unless ($#_ >0) {
 288         return (keys(%{$self->{databases}}));
 289     }
 290
 291     my %tmp;
 292     my $changed = 0;
 293     foreach my $db (@databases) {
 294         $tmp{$db}++;
 295         next if ($self->{databases}{$db});
 296         $changed++;
 297     }
 298
 299     foreach my $db (keys (%{$self->{databases}})) {
 300         $changed++ unless ($tmp{$db});
 301     }
 302
 303     if ($changed) {
 304
 305         delete ($self->{databases});
 306         foreach my $db (@databases) {
 307             $self->{databases}{$db}++;
 308         }
 309
 310         if (IDZebra::select_databases($self->{zh},
 311                                                 ($#databases + 1),
 312                                                 \@databases)) {
 313             logf(LOG_FATAL,
 314                  "Could not select database(s) %s errCode=%d",
 315                  join(",",@databases),
 316                  $self->errCode());
 317             return (0);
 318         } else {
 319             logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
 320         }
 321     }
 322     return (keys(%{$self->{databases}}));
 323 }
 324
 325 # -----------------------------------------------------------------------------
 326 # Error handling
 327 # -----------------------------------------------------------------------------
 328 sub errCode {
 329     my ($self) = @_;
 330     return(IDZebra::errCode($self->{zh}));
 331 }
 332
 333 sub errString {
 334     my ($self) = @_;
 335     return(IDZebra::errString($self->{zh}));
 336 }
 337
 338 sub errAdd {
 339     my ($self) = @_;
 340     return(IDZebra::errAdd($self->{zh}));
 341 }
 342
 343 # -----------------------------------------------------------------------------
 344 # Transaction stuff
 345 # -----------------------------------------------------------------------------
 346 sub begin_trans {
 347     my ($self, $m) = @_;
 348     $m = TRANS_RW unless (defined ($m));
 349     if (my $err = IDZebra::begin_trans($self->{zh},$m)) {
 350         if ($self->errCode == 2) {
 351             croak ("TRANS_RW not allowed within TRANS_RO");
 352         } else {
 353             croak("Error starting transaction; code:".
 354                   $self->errCode . " message: " . $self->errString);
 355         }
 356     }
 357 }
 358
 359 sub end_trans {
 360     my ($self) = @_;
 361     $self->checkzh;
 362     my $stat = IDZebra::ZebraTransactionStatus->new();
 363     IDZebra::end_trans($self->{zh}, $stat);
 364     return ($stat);
 365 }
 366
 367 sub shadow {
 368     my ($self, $value) = @_;
 369     $self->checkzh;
 370     if ($#_ > 0) {
 371         $value = 0 unless (defined($value));
 372         my $r =IDZebra::set_shadow_enable($self->{zh},$value);
 373     }
 374     return (IDZebra::get_shadow_enable($self->{zh}));
 375 }
 376
 377 sub commit {
 378     my ($self) = @_;
 379     $self->checkzh;
 380     if ($self->shadow) {
 381         return(IDZebra::commit($self->{zh}));
 382     }
 383 }
 384
 385 # -----------------------------------------------------------------------------
 386 # We don't really need that...
 387 # -----------------------------------------------------------------------------
 388 sub odr_reset {
 389     my ($self, $name) = @_;
 390     if ($name !~/^(input|output)$/) {
 391         croak("Undefined ODR '$name'");
 392     }
 393   IDZebra::odr_reset($self->{"odr_$name"});
 394 }
 395
 396 # -----------------------------------------------------------------------------
 397 # Init/compact
 398 # -----------------------------------------------------------------------------
 399 sub init {
 400     my ($self) = @_;
 401     $self->checkzh;
 402     return(IDZebra::init($self->{zh}));
 403 }
 404
 405 sub compact {
 406     my ($self) = @_;
 407     $self->checkzh;
 408     return(IDZebra::compact($self->{zh}));
 409 }
 410
 411 sub update {
 412     my ($self, %args) = @_;
 413     $self->checkzh;
 414     my $rg = $self->_update_args(%args);
 415     $self->_selectRecordGroup($rg);
 416     $self->begin_trans;
 417     IDZebra::repository_update($self->{zh});
 418     $self->_selectRecordGroup($self->{rg});
 419     $self->end_trans;
 420 }
 421
 422 sub delete {
 423     my ($self, %args) = @_;
 424     $self->checkzh;
 425     my $rg = $self->_update_args(%args);
 426     $self->_selectRecordGroup($rg);
 427     $self->begin_trans;
 428     IDZebra::repository_delete($self->{zh});
 429     $self->_selectRecordGroup($self->{rg});
 430     $self->end_trans;
 431 }
 432
 433 sub show {
 434     my ($self, %args) = @_;
 435     $self->checkzh;
 436     my $rg = $self->_update_args(%args);
 437     $self->_selectRecordGroup($rg);
 438     $self->begin_trans;
 439     IDZebra::repository_show($self->{zh});
 440     $self->_selectRecordGroup($self->{rg});
 441     $self->end_trans;
 442 }
 443
 444 sub _update_args {
 445     my ($self, %args) = @_;
 446     my $rg = $self->_makeRecordGroup(%args);
 447     $self->_selectRecordGroup($rg);
 448     return ($rg);
 449 }
 450
 451 # -----------------------------------------------------------------------------
 452 # Per record update
 453 # -----------------------------------------------------------------------------
 454 sub insert_record {
 455     my ($self, %args) = @_;
 456     $self->checkzh;
 457     return(IDZebra::insert_record($self->{zh},
 458                                   $self->_record_update_args(%args)));
 459 }
 460
 461 sub update_record {
 462     my ($self, %args) = @_;
 463     $self->checkzh;
 464     return(IDZebra::update_record($self->{zh},
 465                                   $self->_record_update_args(%args)));
 466 }
 467
 468 sub delete_record {
 469     my ($self, %args) = @_;
 470     $self->checkzh;
 471     return(IDZebra::delete_record($self->{zh},
 472                                   $self->_record_update_args(%args)));
 473 }
 474
 475 sub _record_update_args {
 476     my ($self, %args) = @_;
 477
 478     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 479     my $match   = $args{match}      ? $args{match}      : "";
 480     my $rectype = $args{recordType} ? $args{recordType} : "";
 481     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 482     my $force   = $args{force}      ? $args{force}      : 0;
 483
 484     my $buff;
 485
 486     if ($args{data}) {
 487         $buff = $args{data};
 488     }
 489     elsif ($args{file}) {
 490         CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
 491         $buff = join('',(<F>));
 492         CORE::close (F);
 493     }
 494     my $len = length($buff);
 495
 496     delete ($args{sysno});
 497     delete ($args{match});
 498     delete ($args{recordType});
 499     delete ($args{file});
 500     delete ($args{data});
 501     delete ($args{force});
 502
 503     my $rg = $self->_makeRecordGroup(%args);
 504
 505     # If no record type is given, then try to find it out from the
 506     # file extension;
 507     unless ($rectype) {
 508         if (my ($ext) = $fname =~ /\.(\w+)$/) {
 509             my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
 510             $rectype = $rg2->{recordType};
 511         }
 512     }
 513
 514     $rg->{databaseName} = "Default" unless ($rg->{databaseName});
 515
 516     unless ($rectype) {
 517         $rectype="";
 518     }
 519     return ($rg, $rectype, $sysno, $match, $fname, $buff, $len, $force);
 520 }
 521
 522 # -----------------------------------------------------------------------------
 523 # CQL stuff
 524 sub cqlmap {
 525     my ($self,$mapfile) = @_;
 526     if ($#_ > 0) {
 527         if ($self->{cql_mapfile} ne $mapfile) {
 528             unless (-f $mapfile) {
 529                 croak("Cannot find $mapfile");
 530             }
 531             if (defined ($self->{cql_ct})) {
 532               IDZebra::cql_transform_close($self->{cql_ct});
 533             }
 534             $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
 535             $self->{cql_mapfile} = $mapfile;
 536         }
 537     }
 538     return ($self->{cql_mapfile});
 539 }
 540
 541 sub cql2pqf {
 542     my ($self, $cqlquery) = @_;
 543     unless (defined($self->{cql_ct})) {
 544         croak("CQL map file is not specified yet.");
 545     }
 546     my $res = "\0" x 2048;
 547     my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
 548     if ($r) {
 549 #       carp ("Error transforming CQL query: '$cqlquery', status:$r");
 550     }
 551     $res=~s/\0.+$//g;
 552     return ($res,$r);
 553 }
 554
 555
 556 # -----------------------------------------------------------------------------
 557 # Search
 558 # -----------------------------------------------------------------------------
 559 sub search {
 560     my ($self, %args) = @_;
 561
 562     $self->checkzh;
 563
 564     if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
 565
 566     my $query;
 567     if ($args{pqf}) {
 568         $query = $args{pqf};
 569     }
 570     elsif ($args{cql}) {
 571         my $cqlstat;
 572         ($query, $cqlstat) =  $self->cql2pqf($args{cql});
 573         unless ($query) {
 574             croak ("Failed to transform query: '$args{cql}', ".
 575                    "status: ($cqlstat)");
 576         }
 577     }
 578     unless ($query) {
 579         croak ("No query given to search");
 580     }
 581
 582     my @origdbs;
 583
 584     if ($args{databases}) {
 585         @origdbs = $self->databases;
 586         $self->databases(@{$args{databases}});
 587     }
 588
 589     my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
 590
 591     my $rs = $self->_search_pqf($query, $rsname);
 592
 593     if ($args{databases}) {
 594         $self->databases(@origdbs);
 595     }
 596
 597     if ($args{sort}) {
 598         if ($rs->errCode) {
 599             carp("Sort skipped due to search error: ".
 600                  $rs->errCode);
 601         } else {
 602             $rs->sort($args{sort});
 603         }
 604     }
 605
 606     return ($rs);
 607 }
 608
 609 sub _new_setname {
 610     my ($self) = @_;
 611     return ("set_".$self->{rscount}++);
 612 }
 613
 614 sub _search_pqf {
 615     my ($self, $query, $setname) = @_;
 616
 617     my $hits = IDZebra::search_PQF($self->{zh},
 618                                    $self->{odr_input},
 619                                    $self->{odr_output},
 620                                    $query,
 621                                    $setname);
 622
 623     my $rs  = IDZebra::Resultset->new($self,
 624                                       name        => $setname,
 625                                       recordCount => $hits,
 626                                       errCode     => $self->errCode,
 627                                       errString   => $self->errString);
 628     return($rs);
 629 }
 630
 631 # -----------------------------------------------------------------------------
 632 # Sort
 633 #
 634 # Sorting of multiple result sets is not supported by zebra...
 635 # -----------------------------------------------------------------------------
 636
 637 sub sortResultsets {
 638     my ($self, $sortspec, $setname, @sets) = @_;
 639
 640     $self->checkzh;
 641
 642     if ($#sets > 0) {
 643         croak ("Sorting/merging of multiple resultsets is not supported now");
 644     }
 645
 646     my @setnames;
 647     my $count = 0;
 648     foreach my $rs (@sets) {
 649         push (@setnames, $rs->{name});
 650         $count += $rs->{recordCount};  # is this really sure ??? It doesn't
 651                                        # matter now...
 652     }
 653
 654     my $status = IDZebra::sort($self->{zh},
 655                                $self->{odr_output},
 656                                $sortspec,
 657                                $setname,
 658                                \@setnames);
 659
 660     my $errCode = $self->errCode;
 661     my $errString = $self->errString;
 662
 663     logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
 664           $status, $errCode, $errString);
 665
 666     if ($status || $errCode) {$count = 0;}
 667
 668     my $rs  = IDZebra::Resultset->new($self,
 669                                       name        => $setname,
 670                                       recordCount => $count,
 671                                       errCode     => $errCode,
 672                                       errString   => $errString);
 673
 674     return ($rs);
 675 }
 676 # -----------------------------------------------------------------------------
 677 # Scan
 678 # -----------------------------------------------------------------------------
 679 sub scan {
 680     my ($self, %args) = @_;
 681
 682     $self->checkzh;
 683
 684     unless ($args{expression}) {
 685         croak ("No scan expression given");
 686     }
 687
 688     my $sl = IDZebra::ScanList->new($self,%args);
 689
 690     return ($sl);
 691 }
 692
 693 # ============================================================================
 694
 695 __END__
 696
 697 =head1 NAME
 698
 699 IDZebra::Session - A Zebra database server session for update and retrieval
 700
 701 =head1 SYNOPSIS
 702
 703   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 704   $sess->open();
 705
 706   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 707                                  groupName  => 'demo1');
 708
 709   $sess->group(groupName => 'demo2');
 710
 711   $sess->init();
 712
 713   $sess->begin_trans;
 714
 715   $sess->update(path      =>  'lib');
 716
 717   my $s1=$sess->update_record(data       => $rec1,
 718                               recordType => 'grs.perl.pod',
 719                               groupName  => "demo1",
 720                               );
 721
 722   my $stat = $sess->end_trans;
 723
 724   $sess->databases('demo1','demo2');
 725
 726   my $rs1 = $sess->search(cqlmap    => 'demo/cql.map',
 727                           cql       => 'dc.title=IDZebra',
 728                           databases => [qw(demo1 demo2)]);
 729   $sess->close;
 730
 731 =head1 DESCRIPTION
 732
 733 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
 734
 735 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
 736
 737 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
 738
 739 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
 740
 741   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 742   $sess->open();
 743
 744 or
 745
 746   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
 747
 748 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
 749
 750   $sess->close();
 751
 752 This will
 753   - close all transactions
 754   - destroy all result sets and scan lists
 755   - close the session
 756
 757 Note, that if I<shadow registers> are enabled, the changes will not be committed automatically.
 758
 759 In the future different database access methods are going to be available,
 760 like:
 761
 762   $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
 763
 764 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
 765
 766   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 767                                  groupName  => 'demo');
 768
 769
 770 =head1 RECORD GROUPS
 771
 772 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
 773
 774 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:
 775
 776   $sess->group(groupName => ..., ...)
 777
 778 The following options are available:
 779
 780 =over 4
 781
 782 =item B<groupName>
 783
 784 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
 785
 786 =item B<databaseName>
 787
 788 The name of the (logical) database the updated records will belong to.
 789
 790 =item B<path>
 791
 792 This path is used for directory updates (B<update>, B<delete> methods);
 793
 794 =item B<recordId>
 795
 796 This option determines how to identify your records. See I<Zebra manual: Locating Records>
 797
 798 =item B<recordType>
 799
 800 The record type used for indexing.
 801
 802 =item B<flagStoreData>
 803
 804 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
 805
 806 =item B<flagStoreKeys>
 807
 808 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
 809
 810 =item B<flagRw>
 811
 812 ?
 813
 814 =item B<fileVerboseLimit>
 815
 816 Skip log messages, when doing a directory update, and the specified number of files are processed...
 817
 818 =item B<databaseNamePath>
 819
 820 ?
 821
 822 =item B<explainDatabase>
 823
 824 The name of the explain database to be used
 825
 826 =item B<followLinks>
 827
 828 Follow links when doing directory update.
 829
 830 =back
 831
 832 You can use the same parameters calling all update methods.
 833
 834 =head1 TRANSACTIONS (READ / WRITE LOCKS)
 835
 836 A transaction is a block of record update (insert / modify / delete) or retrieval procedures. So, all call to such function will implicitly start a transaction, unless one is already started by
 837
 838   $sess->begin_trans;
 839
 840 or
 841
 842   $sess->begin_trans(TRANS_RW)
 843
 844 (these two are equivalents). The effect of this call is a kind of lock: if you call is a write lock is put on the registers, so other processes trying to update the database will be blocked. If there is already an RW (Read-Write) transaction opened by another process, the I<begin_trans> call will be blocked.
 845
 846 You can also use
 847
 848   $sess->begin_trans(TRANS_RO),
 849
 850 if you would like to put on a "read lock". This one is B<deprecated>, as while you have explicitly opened a transaction for read, you can't open another one for update. For example:
 851
 852   $sess->begin_trans(TRANS_RO);
 853   $sess->begin_tran(TRANS_RW); # invalid, die here
 854   $sess->end_trans;
 855   $sess->end_trans;
 856
 857 is invalid, but
 858
 859   $sess->begin_tran(TRANS_RW);
 860   $sess->begin_trans(TRANS_RO);
 861   $sess->end_trans;
 862   $sess->end_trans;
 863
 864 is valid, but probably useless. Note again, that for each retrieval call, an RO transaction is opened. I<TRANS_RW> and I<TRANS_RO> are exported by default by IDZebra::Session.pm.
 865
 866 For multiple per-record I<updates> it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
 867
 868   $stat = $sess->end_trans;
 869
 870 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
 871
 872   $stat->{processed} # Number of records processed
 873   $stat->{updated}   # Number of records processed
 874   $stat->{deleted}   # Number of records processed
 875   $stat->{inserted}  # Number of records processed
 876   $stat->{stime}     # System time used
 877   $stat->{utime}     # User time used
 878
 879 Normally, if the perl code dies due to some runtime error, or the session is closed, then the API attempts to close all pending transactions.
 880
 881 =head1 THE SHADOW REGISTERS
 882
 883 The Zebra server supports updating of the index structures. That is, you can add, modify, or remove records from databases managed by Zebra without rebuilding the entire index. Since this process involves modifying structured files with various references between blocks of data in the files, the update process is inherently sensitive to system crashes, or to process interruptions: Anything but a successfully completed update process will leave the register files in an unknown state, and you will essentially have no recourse but to re-index everything, or to restore the register files from a backup medium. Further, while the update process is active, users cannot be allowed to access the system, as the contents of the register files may change unpredictably.
 884
 885 You can solve these problems by enabling the shadow register system in Zebra. During the updating procedure, zebraidx will temporarily write changes to the involved files in a set of "shadow files", without modifying the files that are accessed by the active server processes. If the update procedure is interrupted by a system crash or a signal, you simply repeat the procedure - the register files have not been changed or damaged, and the partially written shadow files are automatically deleted before the new updating procedure commences.
 886
 887 At the end of the updating procedure (or in a separate operation, if you so desire), the system enters a "commit mode". First, any active server processes are forced to access those blocks that have been changed from the shadow files rather than from the main register files; the unmodified blocks are still accessed at their normal location (the shadow files are not a complete copy of the register files - they only contain those parts that have actually been modified). If the commit process is interrupted at any point during the commit process, the server processes will continue to access the shadow files until you can repeat the commit procedure and complete the writing of data to the main register files. You can perform multiple update operations to the registers before you commit the changes to the system files, or you can execute the commit operation at the end of each update operation. When the commit phase has completed successfully, any running server processes are instructed to switch their operations to the new, operational register, and the temporary shadow files are deleted.
 888
 889 By default, (in the API !) the use of shadow registers is disabled. If zebra is configured that way (there is a "shadow" entry in zebra.cfg), then the shadow system can be enabled by calling:
 890
 891  $sess->shadow(1);
 892
 893 or disabled by
 894
 895  $sess->shadow(0);
 896
 897 If shadow system is enabled, then you have to commit changes you did, by calling:
 898
 899  $sess->commit;
 900
 901 Note, that you can also determine shadow usage in the session constructor:
 902
 903  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 904                                 shadow    => 1);
 905
 906 Changes to I<shadow> will not have effect, within a I<transaction> (ie.: a transaction is started either with shadow enabled or disabled). For more details, read Zebra documentation: I<Safe Updating - Using Shadow Registers>.
 907
 908 =head1 UPDATING DATA
 909
 910 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
 911
 912   $sess->update(path      =>  'lib');
 913
 914 This will update the database with the files in directory "lib", according to the current record group settings.
 915
 916   $sess->update();
 917
 918 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
 919
 920   $sess->update(groupName => 'demo1',
 921                 path      =>  'lib');
 922
 923 Update the database with files in "lib" according to the settings of group "demo1"
 924
 925   $sess->delete(groupName => 'demo1',
 926                 path      =>  'lib');
 927
 928 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
 929
 930 You can also update records one by one, even directly from the memory:
 931
 932   $sysno = $sess->update_record(data       => $rec1,
 933                                 recordType => 'grs.perl.pod',
 934                                 groupName  => "demo1");
 935
 936 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
 937
 938 You can also index a single file:
 939
 940   $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
 941
 942 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
 943
 944   $sysno = $sess->update_record(data => $rec1,
 945                                 file => "lib/IDZebra/Data1.pm");
 946
 947 And some crazy stuff:
 948
 949   $sysno = $sess->delete_record(sysno => $sysno);
 950
 951 where sysno in itself is sufficient to identify the record
 952
 953   $sysno = $sess->delete_record(data => $rec1,
 954                                 recordType => 'grs.perl.pod',
 955                                 groupName  => "demo1");
 956
 957 This case the record is extracted, and if already exists, located in the database, then deleted...
 958
 959   $sysno = $sess->update_record(data       => $rec1,
 960                                 match      => $myid,
 961                                 recordType => 'grs.perl.pod',
 962                                 groupName  => "demo1");
 963
 964 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in I<recordId> member of the record group, or in the I<recordId> parameter) is provided directly.... Looks much better this way:
 965
 966   $sysno = $sess->update_record(data          => $rec1,
 967                                 databaseName  => 'books',
 968                                 recordId      => '(bib1,ISBN)',
 969                                 recordType    => 'grs.perl.pod',
 970                                 flagStoreData => 1,
 971                                 flagStoreKeys => 1);
 972
 973 You can notice, that it's not necessary to define a record group in zebra.cfg: you can do it "on the fly" in your code.
 974
 975 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped. If you'd like to override this feature, use the I<force=E<gt>1> flag:
 976
 977   $sysno = $sess->update_record(data       => $rec1,
 978                                 recordType => 'grs.perl.pod',
 979                                 groupName  => "demo1",
 980                                 force      => 1);
 981
 982 If you don't like to update the record, if it alerady exists, use the I<insert_record> method:
 983
 984   $sysno = $sess->insert_record(data       => $rec1,
 985                                 recordType => 'grs.perl.pod',
 986                                 groupName  => "demo1");
 987
 988 In this case, sysno will be -1, if the record could not be added, because there was already one in the database, with the same record identifier (generated according to the I<recordId> setting).
 989
 990 =head1 DATABASE SELECTION
 991
 992 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
 993
 994 For searching, you can select databases by calling:
 995
 996   $sess->databases('db1','db2');
 997
 998 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
 999
1000   @dblist = $sess->databases();
1001
1002 =head1 SEARCHING
1003
1004 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
1005
1006   $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
1007                       pqf       => '@attr 1=4 computer');
1008
1009 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
1010
1011 =head2 CCL searching
1012
1013 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
1014
1015 The CCL searching is not currently supported by this API.
1016
1017 =head2 CQL searching
1018
1019 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
1020
1021 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
1022
1023   $sess->cqlmap($mapfile);
1024
1025 Or, you can directly provide the I<mapfile> parameter for the search:
1026
1027   $rs = $sess->search(cqlmap    => 'demo/cql.map',
1028                       cql       => 'dc.title=IDZebra');
1029
1030 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
1031
1032 =head2 Sorting
1033
1034 If you'd like the search results to be sorted, use the I<sort> parameter:
1035
1036   $rs = $sess->search(cql       => 'IDZebra',
1037                       sort      => '1=4 ia');
1038
1039 Note, that B<currently> this is (almost) equivalent to
1040
1041   $rs = $sess->search(cql       => 'IDZebra');
1042   $rs->sort('1=4 ia');
1043
1044 but in the further versions of Zebra and this API a single phase search and sort will take place, optimizing performance. For more details on sorting, see I<IDZebra::ResultSet> manpage.
1045
1046 =head1 RESULTSETS
1047
1048 As you have seen, the result of the search request is a I<Resultset> object.
1049 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
1050
1051   $count = $rs->count;
1052
1053   printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
1054
1055 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
1056
1057 =head1 SCANNING
1058
1059 Zebra supports scanning index values. The result of the
1060
1061   $sl = $sess->scan(expression => "a");
1062
1063 call is an I<IDZebra::ScanList> object, what you can use to list the values. The scan expression has to be provided in a PQF like format. Examples:
1064
1065 B< a> (scan trough words of "default", "Any" indexes)
1066
1067
1068 B< @attr 1=1016 a> (same effect)
1069
1070
1071 B< @attr 1=4 @attr 6=2 a>  (scan trough titles as phrases)
1072
1073 An illegal scan expression will cause your code to die. If you'd like to select databases just for the scan call, you can optionally use the I<databases> parameter:
1074
1075   $sl = $sess->scan(expression => "a",
1076                     databases  => [qw(demo1 demo2)]);
1077
1078 You can use the I<IDZebra::ScanList> object returned by the i<scan> method, to reach the result. Check I<IDZebra::ScanList> manpage for more details.
1079
1080 =head1 SESSION STATUS AND ERRORS
1081
1082 Most of the API calls causes die, if an error occures. You avoid this, by using eval {} blocks. The following methods are available to get the status of Zebra service:
1083
1084 =over 4
1085
1086 =item B<errCode>
1087
1088 The Zebra provided error code... (for the result of the last call);
1089
1090 =item B<errString>
1091
1092 Error string corresponding to the message
1093
1094 =item B<errAdd>
1095
1096 Additional information for the status
1097
1098 =back
1099
1100 This functionality may change, see TODO.
1101
1102 =head1 LOGGING AND MISC. FUNCTIONS
1103
1104 Zebra provides logging facility for the internal events, and also for application developers trough the API. See manpage I<IDZebra::Logger> for details.
1105
1106 =over 4
1107
1108 =item B<IDZebra::LogFile($filename)>
1109
1110 Will set the output file for logging. By default it's STDERR;
1111
1112 =item B<IDZebra::LogLevel(15)>
1113
1114 Set log level. 0 for no logs. See IDZebra::Logger for usable flags.
1115
1116 =back
1117
1118 Some other functions
1119
1120 =over 4
1121
1122 =item B<$sess-E<gt>init>
1123
1124 Initialize, and clean registers. This will remove all data!
1125
1126 =item B<$sess-E<gt>compact>
1127
1128 Compact the registers (? does this work)
1129
1130 =item B<$sess-E<gt>show>
1131
1132 Doesn't have too much meaning. Don't try :)
1133
1134 =back
1135
1136 =head1 TODO
1137
1138 =over 4
1139
1140 =item B<Clean up error handling>
1141
1142 By default all zebra errors should cause die. (such situations could be avoided by using eval {}), and then check for errCode, errString... An optional flag or package variable should be introduced to override this, and skip zebra errors, to let the user decide what to do.
1143
1144 =item B<Make the package self-distributable>
1145
1146 Build and link with installed header and library files
1147
1148 =item B<Testing>
1149
1150 Test shadow system, unicode...
1151
1152 =item B<C API>
1153
1154 Cleanup, arrange, remove redundancy
1155
1156 =back
1157
1158 =head1 COPYRIGHT
1159
1160 Fill in
1161
1162 =head1 AUTHOR
1163
1164 Peter Popovics, pop@technomat.hu
1165
1166 =head1 SEE ALSO
1167
1168 Zebra documentation, Zebra::ResultSet, Zebra::ScanList, Zebra::Logger manpages
1169
1170 =cut