perl/lib/IDZebra/Session.pm

   1 # $Id: Session.pm,v 1.23 2004-09-15 14:11:06 heikki Exp $
   2 #
   3 # Zebra perl API header
   4 # =============================================================================
   5 package IDZebra::Session;
   6
   7 use strict;
   8 use warnings;
   9 use Carp;
  10
  11 BEGIN {
  12     use IDZebra;
  13     use Scalar::Util;
  14     use IDZebra::Logger qw(:flags :calls);
  15     use IDZebra::Resultset;
  16     use IDZebra::ScanList;
  17     use IDZebra::RetrievalRecord;
  18     require Exporter;
  19     our $VERSION = do { my @r = (q$Revision: 1.23 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  20     our @ISA = qw(IDZebra::Logger Exporter);
  21     our @EXPORT = qw (TRANS_RW TRANS_RO);
  22 }
  23
  24 use constant TRANS_RW => 1;
  25 use constant TRANS_RO => 0;
  26
  27 1;
  28 # -----------------------------------------------------------------------------
  29 # Class constructors, destructor
  30 # -----------------------------------------------------------------------------
  31 sub new {
  32     my ($proto, %args) = @_;
  33     my $class = ref($proto) || $proto;
  34     my $self = {};
  35     $self->{args} = \%args;
  36
  37     bless ($self, $class);
  38     $self->{cql_ct} = undef;
  39     $self->{cql_mapfile} = "";
  40     return ($self);
  41
  42     $self->{databases} = {};
  43 }
  44
  45 sub start_service {
  46     my ($self, %args) = @_;
  47
  48     my $zs;
  49     unless (defined($self->{zs})) {
  50         if (defined($args{'configFile'})) {
  51             $self->{zs} = IDZebra::start($args{'configFile'});
  52         } else {
  53             $self->{zs} = IDZebra::start("zebra.cfg");
  54         }
  55     }
  56 }
  57
  58 sub stop_service {
  59     my ($self) = @_;
  60     if (defined($self->{zs})) {
  61         IDZebra::stop($self->{zs}) if ($self->{zs});
  62         $self->{zs} = undef;
  63     }
  64 }
  65
  66
  67 sub open {
  68     my ($proto,%args) = @_;
  69     my $self = {};
  70
  71     if (ref($proto)) { $self = $proto; } else {
  72         $self = $proto->new(%args);
  73     }
  74
  75     unless (%args) {
  76         %args = %{$self->{args}};
  77     }
  78
  79     $self->start_service(%args);
  80
  81     unless (defined($self->{zs})) {
  82         croak ("Falied to open zebra service");
  83     }
  84
  85     unless (defined($self->{zh})) {
  86         $self->{zh}=IDZebra::open($self->{zs});
  87     }
  88
  89     # Reset result set counter
  90     $self->{rscount} = 0;
  91
  92     # This is needed in order to somehow initialize the service
  93     $self->databases("Default");
  94
  95     # ADAM: group call deleted
  96     # Load the default configuration
  97     # $self->group(%args);
  98
  99     # ADAM: Set group resource instead
 100     if (defined($args{groupName})) {
 101         IDZebra::set_resource($self->{zh}, "group", $args{groupName});
 102     }
 103
 104     # Set shadow usage
 105     my $shadow = defined($args{shadow}) ? $args{shadow} : 0;
 106     $self->shadow($shadow);
 107
 108     $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
 109     $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
 110
 111     return ($self);
 112 }
 113
 114 sub checkzh {
 115     my ($self) = @_;
 116     unless (defined($self->{zh})) {
 117         croak ("Zebra session is not opened");
 118     }
 119 }
 120
 121 sub close {
 122     my ($self) = @_;
 123
 124     if ($self->{zh}) {
 125
 126         my $stats = 0;
 127         # Delete all resulsets
 128         my $r = IDZebra::deleteResultSet($self->{zh},
 129                                          1, #Z_DeleteRequest_all,
 130                                          0,[],
 131                                          $stats);
 132
 133         while (IDZebra::trans_no($self->{zh}) > 0) {
 134             logf (LOG_WARN,"Explicitly closing transaction with session");
 135             $self->end_trans;
 136         }
 137
 138         IDZebra::close($self->{zh});
 139         $self->{zh} = undef;
 140     }
 141
 142     if ($self->{odr_input}) {
 143         IDZebra::odr_reset($self->{odr_input});
 144         IDZebra::odr_destroy($self->{odr_input});
 145         $self->{odr_input} = undef;
 146     }
 147
 148     if ($self->{odr_output}) {
 149         IDZebra::odr_reset($self->{odr_output});
 150         IDZebra::odr_destroy($self->{odr_output});
 151         $self->{odr_output} = undef;
 152     }
 153
 154     $self->stop_service;
 155 }
 156
 157 sub DESTROY {
 158     my ($self) = @_;
 159     logf (LOG_LOG,"DESTROY $self");
 160     $self->close;
 161
 162     if (defined ($self->{cql_ct})) {
 163       IDZebra::cql_transform_close($self->{cql_ct});
 164     }
 165
 166 }
 167 # -----------------------------------------------------------------------------
 168 # Record group selection  This is a bit nasty... but used at many places
 169 # -----------------------------------------------------------------------------
 170
 171 # ADAM: All these group functions have been disabled.
 172 sub group_deleted {
 173     my ($self,%args) = @_;
 174     $self->checkzh;
 175     if ($#_ > 0) {
 176         $self->{rg} = $self->_makeRecordGroup(%args);
 177         $self->_selectRecordGroup($self->{rg});
 178     }
 179     return($self->{rg});
 180 }
 181
 182 sub selectRecordGroup_deleted {
 183     my ($self, $groupName) = @_;
 184     $self->checkzh;
 185     $self->{rg} = $self->_getRecordGroup($groupName);
 186     $self->_selectRecordGroup($self->{rg});
 187 }
 188
 189 sub _displayRecordGroup_deleted {
 190     my ($self, $rg) = @_;
 191     print STDERR "-----\n";
 192     foreach my $key qw (groupName
 193                         databaseName
 194                         path recordId
 195                         recordType
 196                         flagStoreData
 197                         flagStoreKeys
 198                         flagRw
 199                         fileVerboseLimit
 200                         databaseNamePath
 201                         explainDatabase
 202                         followLinks) {
 203         print STDERR "$key:",$rg->{$key},"\n";
 204     }
 205 }
 206
 207 sub _cloneRecordGroup_deleted {
 208     my ($self, $orig) = @_;
 209     my $rg = IDZebra::recordGroup->new();
 210     my $r = IDZebra::init_recordGroup($rg);
 211     foreach my $key qw (groupName
 212                         databaseName
 213                         path
 214                         recordId
 215                         recordType
 216                         flagStoreData
 217                         flagStoreKeys
 218                         flagRw
 219                         fileVerboseLimit
 220                         databaseNamePath
 221                         explainDatabase
 222                         followLinks) {
 223         $rg->{$key} = $orig->{$key} if ($orig->{$key});
 224     }
 225     return ($rg);
 226 }
 227
 228 sub _getRecordGroup_deleted {
 229     my ($self, $groupName, $ext) = @_;
 230     my $rg = IDZebra::recordGroup->new();
 231     my $r = IDZebra::init_recordGroup($rg);
 232     $rg->{groupName} = $groupName if ($groupName ne "");
 233     $ext = "" unless ($ext);
 234     $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
 235     return ($rg);
 236 }
 237
 238 sub _makeRecordGroup_deleted {
 239     my ($self, %args) = @_;
 240     my $rg;
 241
 242     my @keys = keys(%args);
 243     unless ($#keys >= 0) {
 244         return ($self->{rg});
 245     }
 246
 247     if ($args{groupName}) {
 248         $rg = $self->_getRecordGroup($args{groupName});
 249     } else {
 250         $rg = $self->_cloneRecordGroup($self->{rg});
 251     }
 252     $self->_setRecordGroupOptions($rg, %args);
 253     return ($rg);
 254 }
 255
 256 sub _setRecordGroupOptions_deleted {
 257     my ($self, $rg, %args) = @_;
 258
 259     foreach my $key qw (databaseName
 260                         path
 261                         recordId
 262                         recordType
 263                         flagStoreData
 264                         flagStoreKeys
 265                         flagRw
 266                         fileVerboseLimit
 267                         databaseNamePath
 268                         explainDatabase
 269                         followLinks) {
 270         if (defined ($args{$key})) {
 271             $rg->{$key} = $args{$key};
 272         }
 273     }
 274 }
 275 sub _selectRecordGroup_deleted {
 276     my ($self, $rg) = @_;
 277
 278     my $r = IDZebra::set_group($self->{zh}, $rg);
 279     my $dbName;
 280     unless ($dbName = $rg->{databaseName}) {
 281         $dbName = 'Default';
 282     }
 283     unless ($self->databases($dbName)) {
 284         croak("Fatal error selecting database $dbName");
 285     }
 286 }
 287 # -----------------------------------------------------------------------------
 288 # Selecting databases for search (and also for updating - internally)
 289 # -----------------------------------------------------------------------------
 290 sub databases {
 291     my ($self, @databases) = @_;
 292
 293     $self->checkzh;
 294
 295     unless ($#_ >0) {
 296         return (keys(%{$self->{databases}}));
 297     }
 298
 299     my %tmp;
 300     my $changed = 0;
 301     foreach my $db (@databases) {
 302         $tmp{$db}++;
 303         next if ($self->{databases}{$db});
 304         $changed++;
 305     }
 306
 307     foreach my $db (keys (%{$self->{databases}})) {
 308         $changed++ unless ($tmp{$db});
 309     }
 310
 311     if ($changed) {
 312
 313         delete ($self->{databases});
 314         foreach my $db (@databases) {
 315             $self->{databases}{$db}++;
 316         }
 317
 318         if (IDZebra::select_databases($self->{zh},
 319                                                 ($#databases + 1),
 320                                                 \@databases)) {
 321             logf(LOG_FATAL,
 322                  "Could not select database(s) %s errCode=%d",
 323                  join(",",@databases),
 324                  $self->errCode());
 325             return (0);
 326         } else {
 327             logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
 328         }
 329     }
 330     return (keys(%{$self->{databases}}));
 331 }
 332
 333 # -----------------------------------------------------------------------------
 334 # Error handling
 335 # -----------------------------------------------------------------------------
 336 sub errCode {
 337     my ($self) = @_;
 338     return(IDZebra::errCode($self->{zh}));
 339 }
 340
 341 sub errString {
 342     my ($self) = @_;
 343     return(IDZebra::errString($self->{zh}));
 344 }
 345
 346 sub errAdd {
 347     my ($self) = @_;
 348     return(IDZebra::errAdd($self->{zh}));
 349 }
 350
 351 # -----------------------------------------------------------------------------
 352 # Transaction stuff
 353 # -----------------------------------------------------------------------------
 354 sub begin_trans {
 355     my ($self, $m) = @_;
 356     $m = TRANS_RW unless (defined ($m));
 357     if (my $err = IDZebra::begin_trans($self->{zh},$m)) {
 358         if ($self->errCode == 2) {
 359             croak ("TRANS_RW not allowed within TRANS_RO");
 360         } else {
 361             croak("Error starting transaction; code:".
 362                   $self->errCode . " message: " . $self->errString);
 363         }
 364     }
 365 }
 366
 367 sub end_trans {
 368     my ($self) = @_;
 369     $self->checkzh;
 370     my $stat = IDZebra::ZebraTransactionStatus->new();
 371     IDZebra::end_trans($self->{zh}, $stat);
 372     return ($stat);
 373 }
 374
 375 sub shadow {
 376     my ($self, $value) = @_;
 377     $self->checkzh;
 378     if ($#_ > 0) {
 379         $value = 0 unless (defined($value));
 380         my $r =IDZebra::set_shadow_enable($self->{zh},$value);
 381     }
 382     return (IDZebra::get_shadow_enable($self->{zh}));
 383 }
 384
 385 sub commit {
 386     my ($self) = @_;
 387     $self->checkzh;
 388     if ($self->shadow) {
 389         return(IDZebra::commit($self->{zh}));
 390     }
 391 }
 392
 393 # -----------------------------------------------------------------------------
 394 # We don't really need that...
 395 # -----------------------------------------------------------------------------
 396 sub odr_reset {
 397     my ($self, $name) = @_;
 398     if ($name !~/^(input|output)$/) {
 399         croak("Undefined ODR '$name'");
 400     }
 401   IDZebra::odr_reset($self->{"odr_$name"});
 402 }
 403
 404 # -----------------------------------------------------------------------------
 405 # Init/compact
 406 # -----------------------------------------------------------------------------
 407 sub init {
 408     my ($self) = @_;
 409     $self->checkzh;
 410     return(IDZebra::init($self->{zh}));
 411 }
 412
 413 sub compact {
 414     my ($self) = @_;
 415     $self->checkzh;
 416     return(IDZebra::compact($self->{zh}));
 417 }
 418
 419 sub update {
 420     my ($self, %args) = @_;
 421     $self->checkzh;
 422     # ADAM: Set group resource
 423     if (defined($args{groupName})) {
 424         IDZebra::set_resource($self->{zh}, "group", $args{groupName});
 425     }
 426     # ADAM: disabled
 427 #    my $rg = $self->_update_args(%args); deleted
 428 #    $self->_selectRecordGroup($rg); deleted
 429     $self->begin_trans;
 430     IDZebra::repository_update($self->{zh}, $args{path});
 431 #     $self->_selectRecordGroup($self->{rg}); deleted
 432     $self->end_trans;
 433 }
 434
 435 sub delete {
 436     my ($self, %args) = @_;
 437     $self->checkzh;
 438     # ADAM: Set group resource
 439     if (defined($args{groupName})) {
 440         IDZebra::set_resource($self->{zh}, "group", $args{groupName});
 441     }
 442     # ADAM: disabled
 443 #    my $rg = $self->_update_args(%args); deleted
 444 #    $self->_selectRecordGroup($rg); deleted
 445     $self->begin_trans;
 446     IDZebra::repository_delete($self->{zh}, $args{path});
 447     # ADAM: disabled
 448 #     $self->_selectRecordGroup($self->{rg});
 449     $self->end_trans;
 450 }
 451
 452 sub show {
 453     my ($self, %args) = @_;
 454     $self->checkzh;
 455     # ADAM: Set group resource
 456     if (defined($args{groupName})) {
 457         IDZebra::set_resource($self->{zh}, "group", $args{groupName});
 458     }
 459     # ADAM: disabled
 460 #    my $rg = $self->_update_args(%args);
 461 #    $self->_selectRecordGroup($rg);
 462
 463     $self->begin_trans;
 464     IDZebra::repository_show($self->{zh});
 465     $self->_selectRecordGroup($self->{rg});
 466     $self->end_trans;
 467 }
 468
 469 sub _update_args_deleted {
 470     my ($self, %args) = @_;
 471     my $rg = $self->_makeRecordGroup(%args);
 472     $self->_selectRecordGroup($rg);
 473     return ($rg);
 474 }
 475
 476 # -----------------------------------------------------------------------------
 477 # Per record update
 478 # -----------------------------------------------------------------------------
 479 sub _get_data_buff {
 480     my %args=@_;
 481     my $buff;
 482     if ($args{data}) {
 483         $buff = $args{data};
 484     }
 485     elsif ($args{file}) {
 486         CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
 487         $buff = join('',(<F>));
 488         CORE::close (F);
 489     }
 490     return $buff;
 491 }
 492
 493 sub insert_record {
 494     my ($self, %args) = @_;
 495     $self->checkzh;
 496     my $rectype = $args{recordType} ? $args{recordType} : "";
 497     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 498     my $force   = $args{force}      ? $args{force}      : 0;
 499     my $buff    =_get_data_buff(%args);
 500     if (!$buff) { die ("insert_record needs a {data} or a {file}");}
 501     my $len = length($buff);
 502     my @args = ($rectype, 0, "", $fname, $buff, $len, $force);
 503     my @ret = IDZebra::insert_record($self->{zh}, @args);
 504     return @ret; # returns ($status, $sysno)
 505 }
 506
 507 sub update_record {
 508     my ($self, %args) = @_;
 509     $self->checkzh;
 510     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 511     my $match   = $args{match}      ? $args{match}      : "";
 512     my $rectype = $args{recordType} ? $args{recordType} : "";
 513     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 514     my $force   = $args{force}      ? $args{force}      : 0;
 515     my $buff    =_get_data_buff(%args);
 516     if (!$buff) { die ("update_record needs a {data} or a {file}");}
 517     my $len = length($buff);
 518     my @args = ($rectype, $sysno, $match, $fname, $buff, $len, $force);
 519     my @ret = IDZebra::update_record($self->{zh}, @args);
 520     return @ret; # ($status, $sysno)
 521 }
 522
 523 sub delete_record {
 524 # can delete by sysno, or by given match string, or by extracting keys
 525 # from the record itself...
 526     my ($self, %args) = @_;
 527     $self->checkzh;
 528     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 529     my $match   = $args{match}      ? $args{match}      : "";
 530     my $rectype = $args{recordType} ? $args{recordType} : "";
 531     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 532     my $force   = $args{force}      ? $args{force}      : 0;
 533     my $buff    =_get_data_buff(%args);
 534     my $len=0;
 535     if ($buff) {$len= length($buff)};
 536     my @args = ($rectype, $sysno, $match, $fname, $buff, $len, $force);
 537     my @ret = IDZebra::delete_record($self->{zh}, @args);
 538     return @ret;
 539 }
 540
 541 sub _record_update_args {
 542     my ($self, %args) = @_;
 543     my $sysno   = $args{sysno}      ? $args{sysno}      : 0;
 544     my $match   = $args{match}      ? $args{match}      : "";
 545     my $rectype = $args{recordType} ? $args{recordType} : "";
 546     my $fname   = $args{file}       ? $args{file}       : "<no file>";
 547     my $force   = $args{force}      ? $args{force}      : 0;
 548
 549     my $buff;
 550
 551     if ($args{data}) {
 552         $buff = $args{data};
 553     }
 554     elsif ($args{file}) {
 555         CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");
 556         $buff = join('',(<F>));
 557         CORE::close (F);
 558     }
 559     my $len = length($buff);
 560
 561     delete ($args{sysno});
 562     delete ($args{match});
 563     delete ($args{recordType});
 564     delete ($args{file});
 565     delete ($args{data});
 566     delete ($args{force});
 567
 568 # ADAM: recordGroup removed ...
 569 #    my $rg = $self->_makeRecordGroup(%args);
 570
 571     # If no record type is given, then try to find it out from the
 572     # file extension; deleted
 573     #unless ($rectype) {
 574 #       if (my ($ext) = $fname =~ /\.(\w+)$/) {
 575 #           my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
 576 #           $rectype = $rg2->{recordType};
 577 #       }
 578 #    }
 579
 580 #    $rg->{databaseName} = "Default" unless ($rg->{databaseName});
 581
 582     unless ($rectype) {
 583         $rectype="";
 584     }
 585     # ADAM: set group resource
 586     if (defined($args{groupName})) {
 587         IDZebra::set_resource($self->{zh}, "group", $args{groupName});
 588     }
 589
 590     # ADAM: rg no longer part of vector..
 591     return ($rectype, $sysno, $match, $fname, $buff, $len, $force);
 592 }
 593
 594 # -----------------------------------------------------------------------------
 595 # CQL stuff
 596 sub cqlmap {
 597     my ($self,$mapfile) = @_;
 598     if ($#_ > 0) {
 599         if ($self->{cql_mapfile} ne $mapfile) {
 600             unless (-f $mapfile) {
 601                 croak("Cannot find $mapfile");
 602             }
 603             if (defined ($self->{cql_ct})) {
 604               IDZebra::cql_transform_close($self->{cql_ct});
 605             }
 606             $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
 607             $self->{cql_mapfile} = $mapfile;
 608         }
 609     }
 610     return ($self->{cql_mapfile});
 611 }
 612
 613 sub cql2pqf {
 614     my ($self, $cqlquery) = @_;
 615     unless (defined($self->{cql_ct})) {
 616         croak("CQL map file is not specified yet.");
 617     }
 618     my $res = "\0" x 2048;
 619     my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
 620     if ($r) {
 621 #       carp ("Error transforming CQL query: '$cqlquery', status:$r");
 622     }
 623     $res=~s/\0.+$//g;
 624     return ($res,$r);
 625 }
 626
 627
 628 # -----------------------------------------------------------------------------
 629 # Search
 630 # -----------------------------------------------------------------------------
 631 sub search {
 632     my ($self, %args) = @_;
 633
 634     $self->checkzh;
 635
 636     if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
 637
 638     my $query;
 639     if ($args{pqf}) {
 640         $query = $args{pqf};
 641     }
 642     elsif ($args{cql}) {
 643         my $cqlstat;
 644         ($query, $cqlstat) =  $self->cql2pqf($args{cql});
 645         unless ($query) {
 646             croak ("Failed to transform query: '$args{cql}', ".
 647                    "status: ($cqlstat)");
 648         }
 649     }
 650     unless ($query) {
 651         croak ("No query given to search");
 652     }
 653
 654     my @origdbs;
 655
 656     if ($args{databases}) {
 657         @origdbs = $self->databases;
 658         $self->databases(@{$args{databases}});
 659     }
 660
 661
 662     my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
 663
 664     my $rs = $self->_search_pqf($query, $rsname);
 665
 666     if ($args{databases}) {
 667         $self->databases(@origdbs);
 668     }
 669
 670     if ($args{sort}) {
 671         if ($rs->errCode) {
 672             carp("Sort skipped due to search error: ".
 673                  $rs->errCode);
 674         } else {
 675             $rs->sort($args{sort});
 676         }
 677     }
 678
 679     return ($rs);
 680 }
 681
 682 sub _new_setname {
 683     my ($self) = @_;
 684     return ("set_".$self->{rscount}++);
 685 }
 686
 687 sub _search_pqf {
 688     my ($self, $query, $setname) = @_;
 689
 690
 691     my $hits = 0;
 692
 693     my $res = IDZebra::search_PQF($self->{zh},
 694                                    $query,
 695                                    $setname,
 696                                    \$hits);
 697
 698     my $rs  = IDZebra::Resultset->new($self,
 699                                       name        => $setname,
 700                                       query       => $query,
 701                                       recordCount => $hits,
 702                                       errCode     => $self->errCode,
 703                                       errString   => $self->errString);
 704     return($rs);
 705 }
 706
 707 # -----------------------------------------------------------------------------
 708 # Sort
 709 #
 710 # Sorting of multiple result sets is not supported by zebra...
 711 # -----------------------------------------------------------------------------
 712
 713 sub sortResultsets {
 714     my ($self, $sortspec, $setname, @sets) = @_;
 715
 716     $self->checkzh;
 717
 718     if ($#sets > 0) {
 719         croak ("Sorting/merging of multiple resultsets is not supported now");
 720     }
 721
 722     my @setnames;
 723     my $count = 0;
 724     foreach my $rs (@sets) {
 725         push (@setnames, $rs->{name});
 726         $count += $rs->{recordCount};  # is this really sure ??? It doesn't
 727                                        # matter now...
 728     }
 729
 730     my $status = IDZebra::sort($self->{zh},
 731                                $self->{odr_output},
 732                                $sortspec,
 733                                $setname,
 734                                \@setnames);
 735
 736
 737     my $errCode = $self->errCode;
 738     my $errString = $self->errString;
 739
 740     logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s",
 741           $status, $errCode, $errString);
 742
 743     if ($status || $errCode) {$count = 0;}
 744
 745     my $rs  = IDZebra::Resultset->new($self,
 746                                       name        => $setname,
 747                                       recordCount => $count,
 748                                       errCode     => $errCode,
 749                                       errString   => $errString);
 750
 751     return ($rs);
 752 }
 753 # -----------------------------------------------------------------------------
 754 # Scan
 755 # -----------------------------------------------------------------------------
 756 sub scan {
 757     my ($self, %args) = @_;
 758
 759     $self->checkzh;
 760
 761     unless ($args{expression}) {
 762         croak ("No scan expression given");
 763     }
 764
 765     my $sl = IDZebra::ScanList->new($self,%args);
 766
 767     return ($sl);
 768 }
 769
 770 # ============================================================================
 771
 772 __END__
 773
 774 =head1 NAME
 775
 776 IDZebra::Session - A Zebra database server session for update and retrieval
 777
 778 =head1 SYNOPSIS
 779
 780   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 781   $sess->open();
 782
 783   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 784                                  groupName  => 'demo1');
 785
 786   $sess->group(groupName => 'demo2');
 787
 788   $sess->init();
 789
 790   $sess->begin_trans;
 791
 792   $sess->update(path      =>  'lib');
 793
 794   my $s1=$sess->update_record(data       => $rec1,
 795                               recordType => 'grs.perl.pod'
 796                               );
 797
 798   my $stat = $sess->end_trans;
 799
 800   $sess->databases('demo1','demo2');
 801
 802   my $rs1 = $sess->search(cqlmap    => 'demo/cql.map',
 803                           cql       => 'dc.title=IDZebra',
 804                           databases => [qw(demo1 demo2)]);
 805   $sess->close;
 806
 807 =head1 DESCRIPTION
 808
 809 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
 810
 811 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
 812
 813 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
 814
 815 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
 816
 817   $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
 818   $sess->open();
 819
 820 or
 821
 822   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
 823
 824 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
 825
 826   $sess->close();
 827
 828 This will
 829   - close all transactions
 830   - destroy all result sets and scan lists
 831   - close the session
 832
 833 Note, that if I<shadow registers> are enabled, the changes will not be committed automatically.
 834
 835 In the future different database access methods are going to be available,
 836 like:
 837
 838   $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
 839
 840 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
 841
 842   $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 843                                  groupName  => 'demo');
 844
 845
 846 =head1 RECORD GROUPS
 847
 848 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
 849
 850 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:
 851
 852   $sess->group(groupName => ..., ...)
 853
 854 The following options are available:
 855
 856 =over 4
 857
 858 =item B<groupName>
 859
 860 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
 861
 862 =item B<databaseName>
 863
 864 The name of the (logical) database the updated records will belong to.
 865
 866 =item B<path>
 867
 868 This path is used for directory updates (B<update>, B<delete> methods);
 869
 870 =item B<recordId>
 871
 872 This option determines how to identify your records. See I<Zebra manual: Locating Records>
 873
 874 =item B<recordType>
 875
 876 The record type used for indexing.
 877
 878 =item B<flagStoreData>
 879
 880 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
 881
 882 =item B<flagStoreKeys>
 883
 884 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
 885
 886 =item B<flagRw>
 887
 888 ?
 889
 890 =item B<fileVerboseLimit>
 891
 892 Skip log messages, when doing a directory update, and the specified number of files are processed...
 893
 894 =item B<databaseNamePath>
 895
 896 ?
 897
 898 =item B<explainDatabase>
 899
 900 The name of the explain database to be used
 901
 902 =item B<followLinks>
 903
 904 Follow links when doing directory update.
 905
 906 =back
 907
 908 You can use the same parameters calling all update methods.
 909
 910 =head1 TRANSACTIONS (READ / WRITE LOCKS)
 911
 912 A transaction is a block of record update (insert / modify / delete) or retrieval procedures. So, all call to such function will implicitly start a transaction, unless one is already started by
 913
 914   $sess->begin_trans;
 915
 916 or
 917
 918   $sess->begin_trans(TRANS_RW)
 919
 920 (these two are equivalents). The effect of this call is a kind of lock: if you call is a write lock is put on the registers, so other processes trying to update the database will be blocked. If there is already an RW (Read-Write) transaction opened by another process, the I<begin_trans> call will be blocked.
 921
 922 You can also use
 923
 924   $sess->begin_trans(TRANS_RO),
 925
 926 if you would like to put on a "read lock". This one is B<deprecated>, as while you have explicitly opened a transaction for read, you can't open another one for update. For example:
 927
 928   $sess->begin_trans(TRANS_RO);
 929   $sess->begin_tran(TRANS_RW); # invalid, die here
 930   $sess->end_trans;
 931   $sess->end_trans;
 932
 933 is invalid, but
 934
 935   $sess->begin_tran(TRANS_RW);
 936   $sess->begin_trans(TRANS_RO);
 937   $sess->end_trans;
 938   $sess->end_trans;
 939
 940 is valid, but probably useless. Note again, that for each retrieval call, an RO transaction is opened. I<TRANS_RW> and I<TRANS_RO> are exported by default by IDZebra::Session.pm.
 941
 942 For multiple per-record I<updates> it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
 943
 944   $stat = $sess->end_trans;
 945
 946 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
 947
 948   $stat->{processed} # Number of records processed
 949   $stat->{updated}   # Number of records processed
 950   $stat->{deleted}   # Number of records processed
 951   $stat->{inserted}  # Number of records processed
 952   $stat->{stime}     # System time used
 953   $stat->{utime}     # User time used
 954
 955 Normally, if the perl code dies due to some runtime error, or the session is closed, then the API attempts to close all pending transactions.
 956
 957 =head1 THE SHADOW REGISTERS
 958
 959 The Zebra server supports updating of the index structures. That is, you can add, modify, or remove records from databases managed by Zebra without rebuilding the entire index. Since this process involves modifying structured files with various references between blocks of data in the files, the update process is inherently sensitive to system crashes, or to process interruptions: Anything but a successfully completed update process will leave the register files in an unknown state, and you will essentially have no recourse but to re-index everything, or to restore the register files from a backup medium. Further, while the update process is active, users cannot be allowed to access the system, as the contents of the register files may change unpredictably.
 960
 961 You can solve these problems by enabling the shadow register system in Zebra. During the updating procedure, zebraidx will temporarily write changes to the involved files in a set of "shadow files", without modifying the files that are accessed by the active server processes. If the update procedure is interrupted by a system crash or a signal, you simply repeat the procedure - the register files have not been changed or damaged, and the partially written shadow files are automatically deleted before the new updating procedure commences.
 962
 963 At the end of the updating procedure (or in a separate operation, if you so desire), the system enters a "commit mode". First, any active server processes are forced to access those blocks that have been changed from the shadow files rather than from the main register files; the unmodified blocks are still accessed at their normal location (the shadow files are not a complete copy of the register files - they only contain those parts that have actually been modified). If the commit process is interrupted at any point during the commit process, the server processes will continue to access the shadow files until you can repeat the commit procedure and complete the writing of data to the main register files. You can perform multiple update operations to the registers before you commit the changes to the system files, or you can execute the commit operation at the end of each update operation. When the commit phase has completed successfully, any running server processes are instructed to switch their operations to the new, operational register, and the temporary shadow files are deleted.
 964
 965 By default, (in the API !) the use of shadow registers is disabled. If zebra is configured that way (there is a "shadow" entry in zebra.cfg), then the shadow system can be enabled by calling:
 966
 967  $sess->shadow(1);
 968
 969 or disabled by
 970
 971  $sess->shadow(0);
 972
 973 If shadow system is enabled, then you have to commit changes you did, by calling:
 974
 975  $sess->commit;
 976
 977 Note, that you can also determine shadow usage in the session constructor:
 978
 979  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
 980                                 shadow    => 1);
 981
 982 Changes to I<shadow> will not have effect, within a I<transaction> (ie.: a transaction is started either with shadow enabled or disabled). For more details, read Zebra documentation: I<Safe Updating - Using Shadow Registers>.
 983
 984 =head1 UPDATING DATA
 985
 986 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
 987
 988   $sess->update(path      =>  'lib');
 989
 990 This will update the database with the files in directory "lib", according to the current record group settings.
 991
 992   $sess->update();
 993
 994 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
 995
 996   $sess->update(groupName => 'demo1',
 997                 path      =>  'lib');
 998
 999 Update the database with files in "lib" according to the settings of group "demo1"
1000
1001   $sess->delete(groupName => 'demo1',
1002                 path      =>  'lib');
1003
1004 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
1005
1006 You can also update records one by one, even directly from the memory:
1007
1008   $sysno = $sess->update_record(data       => $rec1,
1009                                 recordType => 'grs.perl.pod',
1010                                 groupName  => "demo1");
1011
1012 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
1013
1014 You can also index a single file:
1015
1016   $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
1017
1018 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
1019
1020   $sysno = $sess->update_record(data => $rec1,
1021                                 file => "lib/IDZebra/Data1.pm");
1022
1023 And some crazy stuff:
1024
1025   $sysno = $sess->delete_record(sysno => $sysno);
1026
1027 where sysno in itself is sufficient to identify the record
1028
1029   $sysno = $sess->delete_record(data => $rec1,
1030                                 recordType => 'grs.perl.pod',
1031                                 groupName  => "demo1");
1032
1033 This case the record is extracted, and if already exists, located in the database, then deleted...
1034
1035   $sysno = $sess->update_record(data       => $rec1,
1036                                 match      => $myid,
1037                                 recordType => 'grs.perl.pod',
1038                                 groupName  => "demo1");
1039
1040 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in I<recordId> member of the record group, or in the I<recordId> parameter) is provided directly.... Looks much better this way:
1041
1042   $sysno = $sess->update_record(data          => $rec1,
1043                                 databaseName  => 'books',
1044                                 recordId      => '(bib1,ISBN)',
1045                                 recordType    => 'grs.perl.pod',
1046                                 flagStoreData => 1,
1047                                 flagStoreKeys => 1);
1048
1049 You can notice, that it's not necessary to define a record group in zebra.cfg: you can do it "on the fly" in your code.
1050
1051 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped. If you'd like to override this feature, use the I<force=E<gt>1> flag:
1052
1053   $sysno = $sess->update_record(data       => $rec1,
1054                                 recordType => 'grs.perl.pod',
1055                                 groupName  => "demo1",
1056                                 force      => 1);
1057
1058 If you don't like to update the record, if it alerady exists, use the I<insert_record> method:
1059
1060   $sysno = $sess->insert_record(data       => $rec1,
1061                                 recordType => 'grs.perl.pod',
1062                                 groupName  => "demo1");
1063
1064 In this case, sysno will be -1, if the record could not be added, because there was already one in the database, with the same record identifier (generated according to the I<recordId> setting).
1065
1066 =head1 DATABASE SELECTION
1067
1068 Within a zebra repository you can define logical databases. You can either do this by record groups, or by providing the databaseName argument for update methods. For each record the database name it belongs to is stored.
1069
1070 For searching, you can select databases by calling:
1071
1072   $sess->databases('db1','db2');
1073
1074 This will not do anything if the given and only the given databases are already selected. You can get the list of the actually selected databases, by calling:
1075
1076   @dblist = $sess->databases();
1077
1078 =head1 SEARCHING
1079
1080 It's nice to be able to store data in your repository... But it's useful to reach it as well. So this is how to do searching:
1081
1082   $rs = $sess->search(databases => [qw(demo1,demo2)], # optional
1083                       pqf       => '@attr 1=4 computer');
1084
1085 This is going to execute a search in databases demo1 and demo2, for title 'com,puter'. This is a PQF (Prefix Query Format) search, see YAZ documentation for details. The database selection is optional: if it's provided, the given list of databases is selected for this particular search, then the original selection is restored.
1086
1087 =head2 CCL searching
1088
1089 Not all users enjoy typing in prefix query structures and numerical attribute values, even in a minimalistic test client. In the library world, the more intuitive Common Command Language (or ISO 8777) has enjoyed some popularity - especially before the widespread availability of graphical interfaces. It is still useful in applications where you for some reason or other need to provide a symbolic language for expressing boolean query structures.
1090
1091 The CCL searching is not currently supported by this API.
1092
1093 =head2 CQL searching
1094
1095 CQL - Common Query Language - was defined for the SRW protocol. In many ways CQL has a similar syntax to CCL. The objective of CQL is different. Where CCL aims to be an end-user language, CQL is the protocol query language for SRW.
1096
1097 In order to map CQL queries to Zebra internal search structures, you have to define a mapping, the way it is described in YAZ documentation: I<Specification of CQL to RPN mapping>. The mapping is interpreted by the method:
1098
1099   $sess->cqlmap($mapfile);
1100
1101 Or, you can directly provide the I<mapfile> parameter for the search:
1102
1103   $rs = $sess->search(cqlmap    => 'demo/cql.map',
1104                       cql       => 'dc.title=IDZebra');
1105
1106 As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
1107
1108 =head2 Sorting
1109
1110 If you'd like the search results to be sorted, use the I<sort> parameter:
1111
1112   $rs = $sess->search(cql       => 'IDZebra',
1113                       sort      => '1=4 ia');
1114
1115 Note, that B<currently> this is (almost) equivalent to
1116
1117   $rs = $sess->search(cql       => 'IDZebra');
1118   $rs->sort('1=4 ia');
1119
1120 but in the further versions of Zebra and this API a single phase search and sort will take place, optimizing performance. For more details on sorting, see I<IDZebra::ResultSet> manpage.
1121
1122 =head1 RESULTSETS
1123
1124 As you have seen, the result of the search request is a I<Resultset> object.
1125 It contains number of hits, and search status, and can be used to sort and retrieve the resulting records.
1126
1127   $count = $rs->count;
1128
1129   printf ("RS Status is %d (%s)\n", $rs->errCode, $rs->errString);
1130
1131 I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
1132
1133 =head1 SCANNING
1134
1135 Zebra supports scanning index values. The result of the
1136
1137   $sl = $sess->scan(expression => "a");
1138
1139 call is an I<IDZebra::ScanList> object, what you can use to list the values. The scan expression has to be provided in a PQF like format. Examples:
1140
1141 B< a> (scan trough words of "default", "Any" indexes)
1142
1143
1144 B< @attr 1=1016 a> (same effect)
1145
1146
1147 B< @attr 1=4 @attr 6=2 a>  (scan trough titles as phrases)
1148
1149 An illegal scan expression will cause your code to die. If you'd like to select databases just for the scan call, you can optionally use the I<databases> parameter:
1150
1151   $sl = $sess->scan(expression => "a",
1152                     databases  => [qw(demo1 demo2)]);
1153
1154 You can use the I<IDZebra::ScanList> object returned by the i<scan> method, to reach the result. Check I<IDZebra::ScanList> manpage for more details.
1155
1156 =head1 SESSION STATUS AND ERRORS
1157
1158 Most of the API calls causes die, if an error occures. You avoid this, by using eval {} blocks. The following methods are available to get the status of Zebra service:
1159
1160 =over 4
1161
1162 =item B<errCode>
1163
1164 The Zebra provided error code... (for the result of the last call);
1165
1166 =item B<errString>
1167
1168 Error string corresponding to the message
1169
1170 =item B<errAdd>
1171
1172 Additional information for the status
1173
1174 =back
1175
1176 This functionality may change, see TODO.
1177
1178 =head1 LOGGING AND MISC. FUNCTIONS
1179
1180 Zebra provides logging facility for the internal events, and also for application developers trough the API. See manpage I<IDZebra::Logger> for details.
1181
1182 =over 4
1183
1184 =item B<IDZebra::LogFile($filename)>
1185
1186 Will set the output file for logging. By default it's STDERR;
1187
1188 =item B<IDZebra::LogLevel(15)>
1189
1190 Set log level. 0 for no logs. See IDZebra::Logger for usable flags.
1191
1192 =back
1193
1194 Some other functions
1195
1196 =over 4
1197
1198 =item B<$sess-E<gt>init>
1199
1200 Initialize, and clean registers. This will remove all data!
1201
1202 =item B<$sess-E<gt>compact>
1203
1204 Compact the registers (? does this work)
1205
1206 =item B<$sess-E<gt>show>
1207
1208 Doesn't have too much meaning. Don't try :)
1209
1210 =back
1211
1212 =head1 TODO
1213
1214 =over 4
1215
1216 =item B<Clean up error handling>
1217
1218 By default all zebra errors should cause die. (such situations could be avoided by using eval {}), and then check for errCode, errString... An optional flag or package variable should be introduced to override this, and skip zebra errors, to let the user decide what to do.
1219
1220 =item B<Make the package self-distributable>
1221
1222 Build and link with installed header and library files
1223
1224 =item B<Testing>
1225
1226 Test shadow system, unicode...
1227
1228 =item B<C API>
1229
1230 Cleanup, arrange, remove redundancy
1231
1232 =back
1233
1234 =head1 COPYRIGHT
1235
1236 Fill in
1237
1238 =head1 AUTHOR
1239
1240 Peter Popovics, pop@technomat.hu
1241
1242 =head1 SEE ALSO
1243
1244 Zebra documentation, Zebra::ResultSet, Zebra::ScanList, Zebra::Logger manpages
1245
1246 =cut