Updated documentation, ERADME, scanning, sorting and filter virtual file handles...

author pop <pop>

Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)

committer pop <pop>

Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)
author pop <pop>
Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)
committer pop <pop>
Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)
diff --git a/perl/IDZebra.i b/perl/IDZebra.i

index b434490..70efba5 100644 (file)
--- a/perl/IDZebra.i
+++ b/perl/IDZebra.i
@@ -142,9 +142,6 @@ typedef struct {
    long stime;
  } ZebraTransactionStatus;
  
-
-
-
  /* == Module initialization and cleanup (zebra_perl.c) ===================== */
  
  void init (void);
@@ -244,13 +241,6 @@ void zebra_end_transaction (ZebraHandle zh, ZebraTransactionStatus *stat);
  %name(trans_no)
  int zebra_trans_no (ZebraHandle zh);
  
-%name(begin_read)          
-int zebra_begin_read (ZebraHandle zh);
-
-/* end retrieval (remove read lock) (zebraapi.c) */
-%name(end_read)            
-void zebra_end_read (ZebraHandle zh);
-
  /* commit changes from shadow (zebraapi.c) */
  %name(commit)              
  int  zebra_commit (ZebraHandle zh);
diff --git a/perl/IDZebra_wrap.c b/perl/IDZebra_wrap.c

index 22832f0..c4224ea 100644 (file)
--- a/perl/IDZebra_wrap.c
+++ b/perl/IDZebra_wrap.c
@@ -212,7 +212,7 @@ SWIG_TypeClientData(swig_type_info *ti, void *clientdata) {
   * perl5.swg
   *
   * Perl5 runtime library
- * $Header: /home/cvsroot/idis/perl/Attic/IDZebra_wrap.c,v 1.13 2003-03-05 00:35:57 pop Exp $
+ * $Header: /home/cvsroot/idis/perl/Attic/IDZebra_wrap.c,v 1.14 2003-03-05 13:55:21 pop Exp $
   * ----------------------------------------------------------------------------- */
  
  #define SWIGPERL
@@ -3707,66 +3707,6 @@ XS(_wrap_trans_no) {
  }
  
  
-XS(_wrap_begin_read) {
-    char _swigmsg[SWIG_MAX_ERRMSG] = "";
-    const char *_swigerr = _swigmsg;
-    {
-        ZebraHandle arg1 ;
-        int result;
-        int argvi = 0;
-        dXSARGS;
-        
-        if ((items < 1) || (items > 1)) {
-            SWIG_croak("Usage: begin_read(zh);");
-        }
-        {
-            ZebraHandle * argp;
-            if (SWIG_ConvertPtr(ST(0),(void **) &argp, SWIGTYPE_p_ZebraHandle,0) < 0) {
-                SWIG_croak("Type error in argument 1 of begin_read. Expected _p_ZebraHandle");
-            }
-            arg1 = *argp;
-        }
-        result = (int)zebra_begin_read(arg1);
-        
-        ST(argvi) = sv_newmortal();
-        sv_setiv(ST(argvi++), (IV) result);
-        XSRETURN(argvi);
-        fail:
-        (void) _swigerr;
-    }
-    croak(_swigerr);
-}
-
-
-XS(_wrap_end_read) {
-    char _swigmsg[SWIG_MAX_ERRMSG] = "";
-    const char *_swigerr = _swigmsg;
-    {
-        ZebraHandle arg1 ;
-        int argvi = 0;
-        dXSARGS;
-        
-        if ((items < 1) || (items > 1)) {
-            SWIG_croak("Usage: end_read(zh);");
-        }
-        {
-            ZebraHandle * argp;
-            if (SWIG_ConvertPtr(ST(0),(void **) &argp, SWIGTYPE_p_ZebraHandle,0) < 0) {
-                SWIG_croak("Type error in argument 1 of end_read. Expected _p_ZebraHandle");
-            }
-            arg1 = *argp;
-        }
-        zebra_end_read(arg1);
-        
-        
-        XSRETURN(argvi);
-        fail:
-        (void) _swigerr;
-    }
-    croak(_swigerr);
-}
-
-
  XS(_wrap_commit) {
      char _swigmsg[SWIG_MAX_ERRMSG] = "";
      const char *_swigerr = _swigmsg;
@@ -8512,8 +8452,6 @@ static swig_command_info swig_commands[] = {
  {"IDZebrac::begin_trans", _wrap_begin_trans},
  {"IDZebrac::end_trans", _wrap_end_trans},
  {"IDZebrac::trans_no", _wrap_trans_no},
-{"IDZebrac::begin_read", _wrap_begin_read},
-{"IDZebrac::end_read", _wrap_end_read},
  {"IDZebrac::commit", _wrap_commit},
  {"IDZebrac::get_shadow_enable", _wrap_get_shadow_enable},
  {"IDZebrac::set_shadow_enable", _wrap_set_shadow_enable},
diff --git a/perl/README b/perl/README

index 9020c6f..86deb92 100644 (file)
--- a/perl/README
+++ b/perl/README
@@ -1 +1,28 @@
-Write this
+IDZebra - Perl API for IndexData Zebra database server
+
+IDZebra package is distributed as a module, but also included in Zebra
+source distribution. 
+
+To compile this extension, run:
+ 
+  perl Makefile.PL 
+  make
+
+To test:
+
+  make test
+
+or
+
+  make test TEST_VERBOSE=1
+
+If the tests were successfull, you may want to:
+
+  make install
+ 
+There is an embeded POD manual page in lib/IDZebra/Session.pm.
+ 
+The original author of the module is Peter Popovics <pop@technomat.hu>.
+The maintenance is done by IndexData. (http://www.indexdata.dk), you may
+subscribe to zebralist@indexdata.dk.
+
diff --git a/perl/demo/index.sh b/perl/demo/index.sh

index aa2678b..9fa4c36 100755 (executable)
--- a/perl/demo/index.sh
+++ b/perl/demo/index.sh
@@ -6,5 +6,7 @@ fi
  test -d demo/register || mkdir demo/register
  test -d demo/lock || mkdir demo/lock
  test -d demo/tmp || mkdir demo/tmp
+test -d demo/shadow || mkdir demo/shadow
+
  ../index/zebraidx -c demo/zebra.cfg init
-../index/zebraidx -c demo/zebra.cfg -g demo1 update /usr/lib/perl5
+../index/zebraidx -c demo/zebra.cfg -g demo1 -n update /usr/lib/perl5
diff --git a/perl/demo/zebra.cfg b/perl/demo/zebra.cfg

index 5921638..f7b6625 100644 (file)
--- a/perl/demo/zebra.cfg
+++ b/perl/demo/zebra.cfg
@@ -6,7 +6,9 @@ profilePath: blib/lib:blib/arch:blib/lib:demo/:../tab:../../yaz/tab
  attset: bib1.att
  attset: explain.att
  
-register: demo/register:1000M
+register: demo/register:100M
+shadow: demo/shadow:100M
+
  lockDir: demo/lock
  setTmpDir: demo/tmp
  keyTmpDir: demo/tmp
diff --git a/perl/lib/IDZebra.pm b/perl/lib/IDZebra.pm

index 8253bb5..8540a01 100644 (file)
--- a/perl/lib/IDZebra.pm
+++ b/perl/lib/IDZebra.pm
@@ -56,8 +56,6 @@ package IDZebra;
  *begin_trans = *IDZebrac::begin_trans;
  *end_trans = *IDZebrac::end_trans;
  *trans_no = *IDZebrac::trans_no;
-*begin_read = *IDZebrac::begin_read;
-*end_read = *IDZebrac::end_read;
  *commit = *IDZebrac::commit;
  *get_shadow_enable = *IDZebrac::get_shadow_enable;
  *set_shadow_enable = *IDZebrac::set_shadow_enable;
diff --git a/perl/lib/IDZebra/Filter.pm b/perl/lib/IDZebra/Filter.pm

index 43d4cc8..ddcda51 100644 (file)
--- a/perl/lib/IDZebra/Filter.pm
+++ b/perl/lib/IDZebra/Filter.pm
@@ -148,9 +148,13 @@ sub getc {
  
  sub get_fh {
      my ($self) = @_;
-    my $fh = gensym;
-    tie (*$fh,'IDZebra::FilterFile', $self);
-    return ($fh);
+    if ($self->{testh}) {
+       return ($self->{testh});
+    } else {
+       my $fh = gensym;
+       tie (*$fh,'IDZebra::FilterFile', $self);
+       return ($fh);
+    }
  }
  
  sub readall {
@@ -292,27 +296,48 @@ where 'meta' is the abstract syntax identifier (in this case Zebra will try to l
  
  In order to get the input stream, you can use "virtual" file operators (as the source is not necessairly a file):
  
-=item readf($buf,$len,$offset)
+=over 4
+
+=item B<readf($buf,$len,$offset)>
  
  Going to read $len bytes of data from offset $offset into $buff
  
-=item readall($bufflen)
+=item B<readline()>
+
+Read one line
+
+=item B<getc()>
+
+Get one character (byte)
+
+=item B<readall($bufflen)>
  
  Read the entire stream, by reading $bufflen bytes at once
  
-=item seekf($offset)
+=item B<seekf($offset)>
  
  Position to $offset
  
-=item tellf
+=item B<tellf()>
  
  Tells the current offset (?)
  
-=item endf($offset)
+=item B<endf($offset)>
  
  ???
  
-Optionally, you can implement an init call for your class. This call is not going to be called in object, but in class context. Stupid, eh?
+=back
+
+You can optionally get a virtual perl filehandle as well:
+
+  my $fh = $self->get_fh();
+  while (<$fh>) {
+    # ...
+  }
+
+Note, that the virtual filehandle implementation is not finished yet, so some applications may have problems using that. See TODO.
+
+You can implement an init call for your class. This call is not going to be called in object, but in class context. Stupid, eh?
  
  =head1 TEST YOUR PERL FILTER
  
@@ -332,6 +357,14 @@ This will try to apply the filter on the file provided as argument, and display
  This is quite simple. Read the Zebra manual, and follow the instructions to create your zebra.cfg. For your I<recordType> choose 'grs.perl.<YourFilterClass>'. 
  Copy your filter module (YourFilterClass.pm) to a directory listed in I<profilePath>. i<profilePath> is added to @INC, when interpreting your package: so if you need to load modules from different locations than the default perl include path, just add these directories.
  
+=head1 MISC OPTIONS
+
+By default, filter code (process method) is executed within an eval {} block, and only a warning is sent to the log, if there is an error. To turn this option off, set B<$IDZebra::Filter::SAFE_MODE> to B<0>;
+
+=head1 TODO
+
+Finish virtual (tied) filehandle methods (SEEK, EOF, TELL);
+
  =head1 COPYRIGHT
  
  Fill in
diff --git a/perl/lib/IDZebra/Resultset.pm b/perl/lib/IDZebra/Resultset.pm

index 7fdbc5a..80fabdb 100644 (file)
--- a/perl/lib/IDZebra/Resultset.pm
+++ b/perl/lib/IDZebra/Resultset.pm
@@ -1,4 +1,4 @@
-# $Id: Resultset.pm,v 1.7 2003-03-03 18:27:25 pop Exp $
+# $Id: Resultset.pm,v 1.8 2003-03-05 13:55:22 pop Exp $
  # 
  # Zebra perl API header
  # =============================================================================
@@ -12,7 +12,7 @@ BEGIN {
      use IDZebra::Logger qw(:flags :calls);
      use Scalar::Util qw(weaken);
      use Carp;
-    our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
+    our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
      our @ISA = qw(IDZebra::Logger);
  }
  
@@ -223,7 +223,23 @@ The record syntax for retrieval. The default is SUTRS.
  
  =head1 SORTING
  
+You can sort resultsets by calling:
  
+  $rs1->sort($sort_expr);
+
+or create a new sorted resultset:
+
+  $rs2 = $rs1->sort($sort_expr);
+
+The sort expression has the same format as described in the I<yaz_client> documentation. For example:
+
+  $rs1->sort('1=4 id');
+
+will sort thr results by title, in a case insensitive way, in descending order, while
+
+  $rs1->sort('1=4 a');
+
+will sort ascending by titles.
  
  =head1 COPYRIGHT
  
@@ -235,6 +251,6 @@ Peter Popovics, pop@technomat.hu
  
  =head1 SEE ALSO
  
-IDZebra, IDZebra::Data1, Zebra documentation
+Zebra documentation, IDZebra::ResultSet, IDZebra::RetrievalRecord manpages.
  
  =cut
diff --git a/perl/lib/IDZebra/RetrievalRecord.pm b/perl/lib/IDZebra/RetrievalRecord.pm

index efbf906..294fae5 100644 (file)
--- a/perl/lib/IDZebra/RetrievalRecord.pm
+++ b/perl/lib/IDZebra/RetrievalRecord.pm
@@ -1,4 +1,4 @@
-# $Id: RetrievalRecord.pm,v 1.2 2003-03-03 12:14:27 pop Exp $
+# $Id: RetrievalRecord.pm,v 1.3 2003-03-05 13:55:22 pop Exp $
  # 
  # Zebra perl API header
  # =============================================================================
@@ -9,7 +9,7 @@ use warnings;
  
  BEGIN {
      use IDZebra;
-    our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
+    our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
  }
  
  1;
@@ -105,7 +105,7 @@ Peter Popovics, pop@technomat.hu
  
  =head1 SEE ALSO
  
-IDZebra, IDZebra::Session, IDZebra::Resultset, Zebra documentation
+Zebra documentation, IDZebra::Session, IDZebra::ResultSet manpages.
  
  =cut
  
diff --git a/perl/lib/IDZebra/ScanEntry.pm b/perl/lib/IDZebra/ScanEntry.pm

index 1ee9779..eddf3a6 100644 (file)
--- a/perl/lib/IDZebra/ScanEntry.pm
+++ b/perl/lib/IDZebra/ScanEntry.pm
@@ -1,4 +1,4 @@
-# $Id: ScanEntry.pm,v 1.1 2003-03-04 19:33:52 pop Exp $
+# $Id: ScanEntry.pm,v 1.2 2003-03-05 13:55:22 pop Exp $
  # 
  # Zebra perl API header
  # =============================================================================
@@ -12,7 +12,7 @@ BEGIN {
      use IDZebra::Logger qw(:flags :calls);
      use Scalar::Util qw(weaken);
      use Carp;
-    our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
+    our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
  }
  
  1;
@@ -60,10 +60,38 @@ IDZebra::ScanEntry - An entry of the scan results
  
  =head1 SYNOPSIS
  
+  foreach my $se ($sl->entries()) {
+      print STDERR ($se->position ,": ",
+                   $se->term() . " (",
+                   $se->occurrences() . "\n");
+  }
+ 
  =head1 DESCRIPTION
  
+A scan entry describes occurrence of a term in the scanned index.
+
  =head1 PROPERTIES
  
+=over 4
+
+=item B<term>
+
+The term itself.
+
+=item B<position>
+
+Position of term in the list. 1 based.
+
+=item B<occurrences>
+
+The occurrence count of the term in the selected database(s).
+
+=back 
+
+=head1 TODO
+
+A I<resultSet> and maybe a I<records> method, to reach the records, where the term occurred.
+
  =head1 COPYRIGHT
  
  Fill in
diff --git a/perl/lib/IDZebra/ScanList.pm b/perl/lib/IDZebra/ScanList.pm

index 155b1ba..9383f00 100644 (file)
--- a/perl/lib/IDZebra/ScanList.pm
+++ b/perl/lib/IDZebra/ScanList.pm
@@ -1,4 +1,4 @@
-# $Id: ScanList.pm,v 1.1 2003-03-04 19:33:52 pop Exp $
+# $Id: ScanList.pm,v 1.2 2003-03-05 13:55:22 pop Exp $
  # 
  # Zebra perl API header
  # =============================================================================
@@ -13,7 +13,7 @@ BEGIN {
      use IDZebra::ScanEntry;
      use Scalar::Util qw(weaken);
      use Carp;
-    our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
+    our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
      our @ISA = qw(IDZebra::Logger);
  }
  
@@ -141,10 +141,60 @@ IDZebra::ScanList - Scan results
  
  =head1 SYNOPSIS
  
+  $sl = $sess->scan(expression => "\@attr 1=4 \@attr 6=2 a",
+                   databases => [qw(demo1 demo2)]);
+
+  @entries = $sl->entries(position    => 5,
+                         num_entries => 10);
+
+  print STDERR 
+    $sl->num_entries,','
+    $sl->is_partail,',',
+    $sl->position;
+
+
  =head1 DESCRIPTION
  
+The scan list object is the result of a scan call, and can be used to retrieve entries from the list. To do this, use the B<entries> method,
+
+  @entries = $sl->entries(position    => 5,
+                         num_entries => 10);
+
+returning an array of I<IDZebra::ScanEntry> objects. 
+The possible arguments are:
+
+=over 4
+
+=item B<position>
+
+The requested position of the scanned term in the returned list. For example, if position 5 is given, and the scan term is "a", then the entry corresponding to term "a" will be on the position 5 of the list (4th. elment of the array). It may happen, that due to the position of term in the whole index, it's not possible to put the entry on the requested position (for example, the term is on the 2nd position of the index), this case I<$sl-E<gt>position> will contain a different value, presenting the actual position. The default value is 1.
+
+=item B<num_entries>
+
+The requested number of entries in the list. See I<$sl-E<gt>num_entries> for the actual number of fetched entries. The dafault value is 20.
+
+=back
+
  =head1 PROPERTIES
  
+You can reach the following properties as function calls on the IDZebra::ScanList object:
+
+=over 4
+
+=item B<position>
+
+After calling I<entries>, the actual position of the requested term.
+
+=item B<num_entries>
+
+After calling I<entries>, the actual number of entries returned.
+
+=item B<is_partial>
+
+Only partial list is returned by I<entries>.
+
+=back
+
  =head1 COPYRIGHT
  
  Fill in
@@ -155,6 +205,6 @@ Peter Popovics, pop@technomat.hu
  
  =head1 SEE ALSO
  
-IDZebra, IDZebra::Session, Zebra documentation
+Zebra documentation, IDZebra::Session manpage.
  
  =cut
diff --git a/perl/lib/IDZebra/Session.pm b/perl/lib/IDZebra/Session.pm

index 3f402fd..d86cd0d 100644 (file)
--- a/perl/lib/IDZebra/Session.pm
+++ b/perl/lib/IDZebra/Session.pm
@@ -1,4 +1,4 @@
-# $Id: Session.pm,v 1.12 2003-03-05 00:28:16 pop Exp $
+# $Id: Session.pm,v 1.13 2003-03-05 13:55:22 pop Exp $
  # 
  # Zebra perl API header
  # =============================================================================
@@ -7,7 +7,6 @@ package IDZebra::Session;
  use strict;
  use warnings;
  
-
  BEGIN {
      use IDZebra;
      use Scalar::Util;
@@ -15,10 +14,15 @@ BEGIN {
      use IDZebra::Resultset;
      use IDZebra::ScanList;
      use IDZebra::RetrievalRecord;
-    our $VERSION = do { my @r = (q$Revision: 1.12 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
-#    our @ISA = qw(IDZebra::Logger);
+    require Exporter;
+    our $VERSION = do { my @r = (q$Revision: 1.13 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 
+    our @ISA = qw(IDZebra::Logger Exporter);
+    our @EXPORT = qw (TRANS_RW TRANS_RO);
  }
  
+use constant TRANS_RW => 1;
+use constant TRANS_RO => 0;
+
  1;
  # -----------------------------------------------------------------------------
  # Class constructors, destructor
@@ -89,6 +93,11 @@ sub open {
  
      # Load the default configuration
      $self->group(%args);
+
+
+    # Set shadow usage
+    my $shadow = defined($args{shadow}) ? $args{shadow} : 0;
+    $self->shadow($shadow);
      
      $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
      $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
@@ -335,9 +344,9 @@ sub errAdd {
  # Transaction stuff
  # -----------------------------------------------------------------------------
  sub begin_trans {
-    my ($self) = @_;
-    $self->checkzh;
-    if (my $err = IDZebra::begin_trans($self->{zh},1)) {
+    my ($self, $m) = @_;
+    $m = TRANS_RW unless (defined ($m));
+    if (my $err = IDZebra::begin_trans($self->{zh},$m)) {
         if ($self->errCode == 2) {
             croak ("TRANS_RW not allowed within TRANS_RO");
         } else {
@@ -355,29 +364,20 @@ sub end_trans {
      return ($stat);
  }
  
-sub begin_read {
-    my ($self) =@_;
-    $self->checkzh;
-    return(IDZebra::begin_read($self->{zh}));
-}
-
-sub end_read {
-    my ($self) =@_;
-    $self->checkzh;
-    IDZebra::end_read($self->{zh});
-}
-
-sub shadow_enable {
+sub shadow {
      my ($self, $value) = @_;
      $self->checkzh;
-    if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
+    if ($#_ > 0) { 
+       $value = 0 unless (defined($value));
+       my $r =IDZebra::set_shadow_enable($self->{zh},$value); 
+    }
      return (IDZebra::get_shadow_enable($self->{zh}));
  }
  
  sub commit {
      my ($self) = @_;
      $self->checkzh;
-    if ($self->shadow_enable) {
+    if ($self->shadow) {
         return(IDZebra::commit($self->{zh}));
      }
  }
@@ -743,9 +743,11 @@ where $sess is going to be the object representing a Zebra Session. Whenever thi
  
  This will
    - close all transactions
-  - destroy all result sets
+  - destroy all result sets and scan lists 
    - close the session
  
+Note, that if I<shadow registers> are enabled, the changes will not be committed automatically.
+
  In the future different database access methods are going to be available, 
  like:
  
@@ -761,7 +763,7 @@ You can also use the B<record group> arguments described below directly when cal
  
  If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg). 
  
-For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
+For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:
  
    $sess->group(groupName => ..., ...)
  
@@ -821,13 +823,39 @@ Follow links when doing directory update.
  
  You can use the same parameters calling all update methods.
  
-=head1 TRANSACTIONS (WRITE LOCKS)
+=head1 TRANSACTIONS (READ / WRITE LOCKS)
  
-A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
+A transaction is a block of record update (insert / modify / delete) or retrieval procedures. So, all call to such function will implicitly start a transaction, unless one is already started by
  
    $sess->begin_trans;
  
-For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
+or 
+
+  $sess->begin_trans(TRANS_RW)
+
+(these two are equivalents). The effect of this call is a kind of lock: if you call is a write lock is put on the registers, so other processes trying to update the database will be blocked. If there is already an RW (Read-Write) transaction opened by another process, the I<begin_trans> call will be blocked.
+
+You can also use
+
+  $sess->begin_trans(TRANS_RO),
+
+if you would like to put on a "read lock". This one is B<deprecated>, as while you have explicitly opened a transaction for read, you can't open another one for update. For example:
+
+  $sess->begin_trans(TRANS_RO);
+  $sess->begin_tran(TRANS_RW); # invalid, die here
+  $sess->end_trans;
+  $sess->end_trans;
+
+is invalid, but
+
+  $sess->begin_tran(TRANS_RW); 
+  $sess->begin_trans(TRANS_RO);
+  $sess->end_trans;
+  $sess->end_trans;
+
+is valid, but probably useless. Note again, that for each retrieval call, an RO transaction is opened. I<TRANS_RW> and I<TRANS_RO> are exported by default by IDZebra::Session.pm.
+
+For multiple per-record I<updates> it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
  
    $stat = $sess->end_trans;
  
@@ -840,6 +868,35 @@ The return value is a ZebraTransactionStatus object, containing the following me
    $stat->{stime}     # System time used
    $stat->{utime}     # User time used
  
+Normally, if the perl code dies due to some runtime error, or the session is closed, then the API attempts to close all pending transactions.
+
+=head1 THE SHADOW REGISTERS
+
+The Zebra server supports updating of the index structures. That is, you can add, modify, or remove records from databases managed by Zebra without rebuilding the entire index. Since this process involves modifying structured files with various references between blocks of data in the files, the update process is inherently sensitive to system crashes, or to process interruptions: Anything but a successfully completed update process will leave the register files in an unknown state, and you will essentially have no recourse but to re-index everything, or to restore the register files from a backup medium. Further, while the update process is active, users cannot be allowed to access the system, as the contents of the register files may change unpredictably. 
+
+You can solve these problems by enabling the shadow register system in Zebra. During the updating procedure, zebraidx will temporarily write changes to the involved files in a set of "shadow files", without modifying the files that are accessed by the active server processes. If the update procedure is interrupted by a system crash or a signal, you simply repeat the procedure - the register files have not been changed or damaged, and the partially written shadow files are automatically deleted before the new updating procedure commences. 
+
+At the end of the updating procedure (or in a separate operation, if you so desire), the system enters a "commit mode". First, any active server processes are forced to access those blocks that have been changed from the shadow files rather than from the main register files; the unmodified blocks are still accessed at their normal location (the shadow files are not a complete copy of the register files - they only contain those parts that have actually been modified). If the commit process is interrupted at any point during the commit process, the server processes will continue to access the shadow files until you can repeat the commit procedure and complete the writing of data to the main register files. You can perform multiple update operations to the registers before you commit the changes to the system files, or you can execute the commit operation at the end of each update operation. When the commit phase has completed successfully, any running server processes are instructed to switch their operations to the new, operational register, and the temporary shadow files are deleted. 
+
+By default, (in the API !) the use of shadow registers is disabled. If zebra is configured that way (there is a "shadow" entry in zebra.cfg), then the shadow system can be enabled by calling:
+
+ $sess->shadow(1);
+
+or disabled by
+
+ $sess->shadow(0);
+
+If shadow system is enabled, then you have to commit changes you did, by calling:
+ 
+ $sess->commit;
+
+Note, that you can also determine shadow usage in the session constructor:
+
+ $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
+                               shadow    => 1);
+ 
+Changes to I<shadow> will not have effect, within a I<transaction> (ie.: a transaction is started either with shadow enabled or disabled). For more details, read Zebra documentation: I<Safe Updating - Using Shadow Registers>.
+
  =head1 UPDATING DATA
  
  There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
@@ -938,11 +995,25 @@ In order to map CQL queries to Zebra internal search structures, you have to def
  
  Or, you can directly provide the I<mapfile> parameter for the search:
  
-  my $rs1 = $sess->search(cqlmap    => 'demo/cql.map',
-                         cql       => 'dc.title=IDZebra');
+  $rs = $sess->search(cqlmap    => 'demo/cql.map',
+                     cql       => 'dc.title=IDZebra');
  
  As you see, CQL searching is so simple: just give the query in the I<cql> parameter.
  
+=head2 Sorting
+
+If you'd like the search results to be sorted, use the I<sort> parameter:
+
+  $rs = $sess->search(cql       => 'IDZebra',
+                     sort      => '1=4 ia');
+  
+Note, that B<currently> this is (almost) equivalent to
+
+  $rs = $sess->search(cql       => 'IDZebra');
+  $rs->sort('1=4 ia');
+  
+but in the further versions of Zebra and this API a single phase search and sort will take place, optimizing performance. For more details on sorting, see I<IDZebra::ResultSet> manpage.
+
  =head1 RESULTSETS
  
  As you have seen, the result of the search request is a I<Resultset> object.
@@ -954,7 +1025,106 @@ It contains number of hits, and search status, and can be used to sort and retri
  
  I<$rs-E<gt>errCode> is 0, if there were no errors during search. Read the I<IDZebra::Resultset> manpage for more details.
  
-=head1 MISC FUNCTIONS
+=head1 SCANNING
+
+Zebra supports scanning index values. The result of the 
+
+  $sl = $sess->scan(expression => "a");
+
+call is an I<IDZebra::ScanList> object, what you can use to list the values. The scan expression has to be provided in a PQF like format. Examples:
+
+B< a> (scan trough words of "default", "Any" indexes)
+
+
+B< @attr 1=1016 a> (same effect)
+
+
+B< @attr 1=4 @attr 6=2 a>  (scan trough titles as phrases)
+
+An illegal scan expression will cause your code to die. If you'd like to select databases just for the scan call, you can optionally use the I<databases> parameter:
+
+  $sl = $sess->scan(expression => "a",
+                    databases  => [qw(demo1 demo2)]);
+  
+You can use the I<IDZebra::ScanList> object returned by the i<scan> method, to reach the result. Check I<IDZebra::ScanList> manpage for more details.
+
+=head1 SESSION STATUS AND ERRORS
+
+Most of the API calls causes die, if an error occures. You avoid this, by using eval {} blocks. The following methods are available to get the status of Zebra service:
+
+=over 4
+
+=item B<errCode>
+
+The Zebra provided error code... (for the result of the last call);
+
+=item B<errString>
+
+Error string corresponding to the message
+
+=item B<errAdd>
+
+Additional information for the status
+
+=back
+
+This functionality may change, see TODO.
+
+=head1 LOGGING AND MISC. FUNCTIONS
+
+Zebra provides logging facility for the internal events, and also for application developers trough the API. See manpage I<IDZebra::Logger> for details.
+
+=over 4
+
+=item B<IDZebra::LogFile($filename)>
+
+Will set the output file for logging. By default it's STDERR;
+
+=item B<IDZebra::LogLevel(15)>
+
+Set log level. 0 for no logs. See IDZebra::Logger for usable flags.
+
+=back
+
+Some other functions
+
+=over 4
+
+=item B<$sess-E<gt>init>
+
+Initialize, and clean registers. This will remove all data!
+
+=item B<$sess-E<gt>compact>
+
+Compact the registers (? does this work)
+
+=item B<$sess-E<gt>show>
+
+Doesn't have too much meaning. Don't try :)
+
+=back
+
+=head1 TODO
+
+=over 4
+
+=item B<Clean up error handling>
+
+By default all zebra errors should cause die. (such situations could be avoided by using eval {}), and then check for errCode, errString... An optional flag or package variable should be introduced to override this, and skip zebra errors, to let the user decide what to do.
+
+=item B<Make the package self-distributable>
+
+Build and link with installed header and library files
+
+=item B<Testing>
+
+Test shadow system, unicode...
+
+=item B<C API>
+
+Cleanup, arrange, remove redundancy
+
+=back
  
  =head1 COPYRIGHT
  
@@ -966,6 +1136,6 @@ Peter Popovics, pop@technomat.hu
  
  =head1 SEE ALSO
  
-IDZebra, IDZebra::Data1, Zebra documentation
+Zebra documentation, Zebra::ResultSet, Zebra::ScanList, Zebra::Logger manpages
  
  =cut
diff --git a/perl/t/01_base.t b/perl/t/01_base.t

index 0bf814b..057b951 100644 (file)
--- a/perl/t/01_base.t
+++ b/perl/t/01_base.t
@@ -1,6 +1,6 @@
  #!perl -Tw
  # =============================================================================
-# $Id: 01_base.t,v 1.3 2003-03-05 00:28:16 pop Exp $
+# $Id: 01_base.t,v 1.4 2003-03-05 13:55:22 pop Exp $
  #
  # Perl API header
  # =============================================================================
@@ -57,10 +57,11 @@ ok(($sess->group->{databaseName} eq "demo2"),"Record group is selected");
  
  # ---------------------------------------------------------------------------
  # Transactions
-$sess->begin_read;
-eval {$sess->begin_trans;};
+$sess->begin_trans(TRANS_RO);
+eval {$sess->begin_trans(TRANS_RW);};
  ok (($@ ne ""), $@);
-$sess->end_read;
+$sess->end_trans;
+$sess->end_trans;
  
  
  
diff --git a/perl/t/02_directory_update.t b/perl/t/02_directory_update.t

index 861b71d..6ba4c3f 100644 (file)
--- a/perl/t/02_directory_update.t
+++ b/perl/t/02_directory_update.t
@@ -1,6 +1,6 @@
  #!perl
  # =============================================================================
-# $Id: 02_directory_update.t,v 1.2 2003-03-04 19:33:53 pop Exp $
+# $Id: 02_directory_update.t,v 1.3 2003-03-05 13:55:22 pop Exp $
  #
  # Perl API header
  # =============================================================================
@@ -29,7 +29,7 @@ BEGIN {
  # ----------------------------------------------------------------------------
  # Session opening and closing
  my $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
-                                 groupName => 'demo2');
+                                 groupName  => 'demo2');
  isa_ok($sess,"IDZebra::Session");
  
  SKIP: {
@@ -38,7 +38,6 @@ SKIP: {
  # ----------------------------------------------------------------------------
  # init repository
  $sess->init();
-
  # ----------------------------------------------------------------------------
  # repository upadte
  
@@ -71,7 +70,7 @@ $sess->update(groupName => 'demo1',
  
  $stat = $sess->end_trans;
  ok(($stat->{inserted} == $filecount), 
-   "Inserted $stat->{inserted}/$filecount records");
+   "Inserted $stat->{inserted}/$filecount records with shadow");
  
  ok(($sess->group->{databaseName} eq "demo2"),"Original group is selected");
  
diff --git a/perl/t/03_record_update.t b/perl/t/03_record_update.t

index 15f0baf..ae5f865 100644 (file)
--- a/perl/t/03_record_update.t
+++ b/perl/t/03_record_update.t
@@ -1,6 +1,6 @@
  #!perl
  # =============================================================================
-# $Id: 03_record_update.t,v 1.1 2003-03-03 00:44:39 pop Exp $
+# $Id: 03_record_update.t,v 1.2 2003-03-05 13:55:22 pop Exp $
  #
  # Perl API header
  # =============================================================================
@@ -29,7 +29,8 @@ BEGIN {
  # ----------------------------------------------------------------------------
  # Session opening and closing
  my $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
-                                 groupName => 'demo2');
+                                 groupName => 'demo2',
+                                 shadow    => 1);
  isa_ok($sess,"IDZebra::Session");
  
  # ----------------------------------------------------------------------------
@@ -69,4 +70,5 @@ ok(($stat->{inserted} == 1), "Inserted 1 records");
  # ----------------------------------------------------------------------------
  # Close session
  
+$sess->commit;
  $sess->close;
author	pop <pop>
	Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)
committer	pop <pop>
	Wed, 5 Mar 2003 13:55:21 +0000 (13:55 +0000)
perl/IDZebra.i		patch \| blob \| history
perl/IDZebra_wrap.c		patch \| blob \| history
perl/README		patch \| blob \| history
perl/demo/index.sh		patch \| blob \| history
perl/demo/zebra.cfg		patch \| blob \| history
perl/lib/IDZebra.pm		patch \| blob \| history
perl/lib/IDZebra/Filter.pm		patch \| blob \| history
perl/lib/IDZebra/Resultset.pm		patch \| blob \| history
perl/lib/IDZebra/RetrievalRecord.pm		patch \| blob \| history
perl/lib/IDZebra/ScanEntry.pm		patch \| blob \| history
perl/lib/IDZebra/ScanList.pm		patch \| blob \| history
perl/lib/IDZebra/Session.pm		patch \| blob \| history
perl/t/01_base.t		patch \| blob \| history
perl/t/02_directory_update.t		patch \| blob \| history
perl/t/03_record_update.t		patch \| blob \| history